1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/descriptor.h>
36 
37 #include <algorithm>
38 #include <array>
39 #include <functional>
40 #include <limits>
41 #include <map>
42 #include <memory>
43 #include <set>
44 #include <string>
45 #include <unordered_map>
46 #include <unordered_set>
47 #include <vector>
48 
49 #include <google/protobuf/stubs/common.h>
50 #include <google/protobuf/stubs/logging.h>
51 #include <google/protobuf/stubs/stringprintf.h>
52 #include <google/protobuf/stubs/strutil.h>
53 #include <google/protobuf/any.h>
54 #include <google/protobuf/descriptor.pb.h>
55 #include <google/protobuf/io/coded_stream.h>
56 #include <google/protobuf/io/tokenizer.h>
57 #include <google/protobuf/io/zero_copy_stream_impl.h>
58 #include <google/protobuf/descriptor_database.h>
59 #include <google/protobuf/dynamic_message.h>
60 #include <google/protobuf/generated_message_util.h>
61 #include <google/protobuf/text_format.h>
62 #include <google/protobuf/unknown_field_set.h>
63 #include <google/protobuf/wire_format.h>
64 #include <google/protobuf/stubs/casts.h>
65 #include <google/protobuf/stubs/substitute.h>
66 #include <google/protobuf/io/strtod.h>
67 #include <google/protobuf/stubs/map_util.h>
68 #include <google/protobuf/stubs/stl_util.h>
69 #include <google/protobuf/stubs/hash.h>
70 
71 #undef PACKAGE  // autoheader #defines this.  :(
72 
73 
74 #include <google/protobuf/port_def.inc>
75 
76 namespace google {
77 namespace protobuf {
78 
79 class Symbol {
80  public:
81   enum Type {
82     NULL_SYMBOL,
83     MESSAGE,
84     FIELD,
85     ONEOF,
86     ENUM,
87     ENUM_VALUE,
88     ENUM_VALUE_OTHER_PARENT,
89     SERVICE,
90     METHOD,
91     PACKAGE,
92     QUERY_KEY
93   };
94 
Symbol()95   Symbol() : ptr_(nullptr) {}
96 
97   // Every object we store derives from internal::SymbolBase, where we store the
98   // symbol type enum.
99   // Storing in the object can be done without using more space in most cases,
100   // while storing it in the Symbol type would require 8 bytes.
101 #define DEFINE_MEMBERS(TYPE, TYPE_CONSTANT, FIELD)                             \
102   explicit Symbol(TYPE* value) : ptr_(value) {                                 \
103     value->symbol_type_ = TYPE_CONSTANT;                                       \
104   }                                                                            \
105   const TYPE* FIELD() const {                                                  \
106     return type() == TYPE_CONSTANT ? static_cast<const TYPE*>(ptr_) : nullptr; \
107   }
108 
109   DEFINE_MEMBERS(Descriptor, MESSAGE, descriptor)
110   DEFINE_MEMBERS(FieldDescriptor, FIELD, field_descriptor)
111   DEFINE_MEMBERS(OneofDescriptor, ONEOF, oneof_descriptor)
112   DEFINE_MEMBERS(EnumDescriptor, ENUM, enum_descriptor)
113   DEFINE_MEMBERS(ServiceDescriptor, SERVICE, service_descriptor)
114   DEFINE_MEMBERS(MethodDescriptor, METHOD, method_descriptor)
115 
116   // We use a special node for FileDescriptor.
117   // It is potentially added to the table with multiple different names, so we
118   // need a separate place to put the name.
119   struct Package : internal::SymbolBase {
120     const std::string* name;
121     const FileDescriptor* file;
122   };
DEFINE_MEMBERS(Package,PACKAGE,package_file_descriptor)123   DEFINE_MEMBERS(Package, PACKAGE, package_file_descriptor)
124 
125   // Enum values have two different parents.
126   // We use two different identitied for the same object to determine the two
127   // different insertions in the map.
128   static Symbol EnumValue(EnumValueDescriptor* value, int n) {
129     Symbol s;
130     internal::SymbolBase* ptr;
131     if (n == 0) {
132       ptr = static_cast<internal::SymbolBaseN<0>*>(value);
133       ptr->symbol_type_ = ENUM_VALUE;
134     } else {
135       ptr = static_cast<internal::SymbolBaseN<1>*>(value);
136       ptr->symbol_type_ = ENUM_VALUE_OTHER_PARENT;
137     }
138     s.ptr_ = ptr;
139     return s;
140   }
141 
enum_value_descriptor() const142   const EnumValueDescriptor* enum_value_descriptor() const {
143     return type() == ENUM_VALUE
144                ? static_cast<const EnumValueDescriptor*>(
145                      static_cast<const internal::SymbolBaseN<0>*>(ptr_))
146            : type() == ENUM_VALUE_OTHER_PARENT
147                ? static_cast<const EnumValueDescriptor*>(
148                      static_cast<const internal::SymbolBaseN<1>*>(ptr_))
149                : nullptr;
150   }
151 
152   // Not a real symbol.
153   // Only used for heterogeneous lookups and never actually inserted in the
154   // tables.
155   struct QueryKey : internal::SymbolBase {
156     StringPiece name;
157     const void* parent;
158   };
159   DEFINE_MEMBERS(QueryKey, QUERY_KEY, query_key);
160 #undef DEFINE_MEMBERS
161 
type() const162   Type type() const {
163     return ptr_ == nullptr ? NULL_SYMBOL
164                            : static_cast<Type>(ptr_->symbol_type_);
165   }
IsNull() const166   bool IsNull() const { return type() == NULL_SYMBOL; }
IsType() const167   bool IsType() const { return type() == MESSAGE || type() == ENUM; }
IsAggregate() const168   bool IsAggregate() const {
169     return type() == MESSAGE || type() == PACKAGE || type() == ENUM ||
170            type() == SERVICE;
171   }
172 
GetFile() const173   const FileDescriptor* GetFile() const {
174     switch (type()) {
175       case MESSAGE:
176         return descriptor()->file();
177       case FIELD:
178         return field_descriptor()->file();
179       case ONEOF:
180         return oneof_descriptor()->containing_type()->file();
181       case ENUM:
182         return enum_descriptor()->file();
183       case ENUM_VALUE:
184         return enum_value_descriptor()->type()->file();
185       case SERVICE:
186         return service_descriptor()->file();
187       case METHOD:
188         return method_descriptor()->service()->file();
189       case PACKAGE:
190         return package_file_descriptor()->file;
191       default:
192         return nullptr;
193     }
194   }
195 
full_name() const196   StringPiece full_name() const {
197     switch (type()) {
198       case MESSAGE:
199         return descriptor()->full_name();
200       case FIELD:
201         return field_descriptor()->full_name();
202       case ONEOF:
203         return oneof_descriptor()->full_name();
204       case ENUM:
205         return enum_descriptor()->full_name();
206       case ENUM_VALUE:
207         return enum_value_descriptor()->full_name();
208       case SERVICE:
209         return service_descriptor()->full_name();
210       case METHOD:
211         return method_descriptor()->full_name();
212       case PACKAGE:
213         return *package_file_descriptor()->name;
214       case QUERY_KEY:
215         return query_key()->name;
216       default:
217         GOOGLE_CHECK(false);
218     }
219     return "";
220   }
221 
parent_key() const222   std::pair<const void*, StringPiece> parent_key() const {
223     const auto or_file = [&](const void* p) { return p ? p : GetFile(); };
224     switch (type()) {
225       case MESSAGE:
226         return {or_file(descriptor()->containing_type()), descriptor()->name()};
227       case FIELD: {
228         auto* field = field_descriptor();
229         return {or_file(field->is_extension() ? field->extension_scope()
230                                               : field->containing_type()),
231                 field->name()};
232       }
233       case ONEOF:
234         return {oneof_descriptor()->containing_type(),
235                 oneof_descriptor()->name()};
236       case ENUM:
237         return {or_file(enum_descriptor()->containing_type()),
238                 enum_descriptor()->name()};
239       case ENUM_VALUE:
240         return {or_file(enum_value_descriptor()->type()->containing_type()),
241                 enum_value_descriptor()->name()};
242       case ENUM_VALUE_OTHER_PARENT:
243         return {enum_value_descriptor()->type(),
244                 enum_value_descriptor()->name()};
245       case SERVICE:
246         return {GetFile(), service_descriptor()->name()};
247       case METHOD:
248         return {method_descriptor()->service(), method_descriptor()->name()};
249       case QUERY_KEY:
250         return {query_key()->parent, query_key()->name};
251       default:
252         GOOGLE_CHECK(false);
253     }
254     return {};
255   }
256 
257  private:
258   const internal::SymbolBase* ptr_;
259 };
260 
261 const FieldDescriptor::CppType
262     FieldDescriptor::kTypeToCppTypeMap[MAX_TYPE + 1] = {
263         static_cast<CppType>(0),  // 0 is reserved for errors
264 
265         CPPTYPE_DOUBLE,   // TYPE_DOUBLE
266         CPPTYPE_FLOAT,    // TYPE_FLOAT
267         CPPTYPE_INT64,    // TYPE_INT64
268         CPPTYPE_UINT64,   // TYPE_UINT64
269         CPPTYPE_INT32,    // TYPE_INT32
270         CPPTYPE_UINT64,   // TYPE_FIXED64
271         CPPTYPE_UINT32,   // TYPE_FIXED32
272         CPPTYPE_BOOL,     // TYPE_BOOL
273         CPPTYPE_STRING,   // TYPE_STRING
274         CPPTYPE_MESSAGE,  // TYPE_GROUP
275         CPPTYPE_MESSAGE,  // TYPE_MESSAGE
276         CPPTYPE_STRING,   // TYPE_BYTES
277         CPPTYPE_UINT32,   // TYPE_UINT32
278         CPPTYPE_ENUM,     // TYPE_ENUM
279         CPPTYPE_INT32,    // TYPE_SFIXED32
280         CPPTYPE_INT64,    // TYPE_SFIXED64
281         CPPTYPE_INT32,    // TYPE_SINT32
282         CPPTYPE_INT64,    // TYPE_SINT64
283 };
284 
285 const char* const FieldDescriptor::kTypeToName[MAX_TYPE + 1] = {
286     "ERROR",  // 0 is reserved for errors
287 
288     "double",    // TYPE_DOUBLE
289     "float",     // TYPE_FLOAT
290     "int64",     // TYPE_INT64
291     "uint64",    // TYPE_UINT64
292     "int32",     // TYPE_INT32
293     "fixed64",   // TYPE_FIXED64
294     "fixed32",   // TYPE_FIXED32
295     "bool",      // TYPE_BOOL
296     "string",    // TYPE_STRING
297     "group",     // TYPE_GROUP
298     "message",   // TYPE_MESSAGE
299     "bytes",     // TYPE_BYTES
300     "uint32",    // TYPE_UINT32
301     "enum",      // TYPE_ENUM
302     "sfixed32",  // TYPE_SFIXED32
303     "sfixed64",  // TYPE_SFIXED64
304     "sint32",    // TYPE_SINT32
305     "sint64",    // TYPE_SINT64
306 };
307 
308 const char* const FieldDescriptor::kCppTypeToName[MAX_CPPTYPE + 1] = {
309     "ERROR",  // 0 is reserved for errors
310 
311     "int32",    // CPPTYPE_INT32
312     "int64",    // CPPTYPE_INT64
313     "uint32",   // CPPTYPE_UINT32
314     "uint64",   // CPPTYPE_UINT64
315     "double",   // CPPTYPE_DOUBLE
316     "float",    // CPPTYPE_FLOAT
317     "bool",     // CPPTYPE_BOOL
318     "enum",     // CPPTYPE_ENUM
319     "string",   // CPPTYPE_STRING
320     "message",  // CPPTYPE_MESSAGE
321 };
322 
323 const char* const FieldDescriptor::kLabelToName[MAX_LABEL + 1] = {
324     "ERROR",  // 0 is reserved for errors
325 
326     "optional",  // LABEL_OPTIONAL
327     "required",  // LABEL_REQUIRED
328     "repeated",  // LABEL_REPEATED
329 };
330 
SyntaxName(FileDescriptor::Syntax syntax)331 const char* FileDescriptor::SyntaxName(FileDescriptor::Syntax syntax) {
332   switch (syntax) {
333     case SYNTAX_PROTO2:
334       return "proto2";
335     case SYNTAX_PROTO3:
336       return "proto3";
337     case SYNTAX_UNKNOWN:
338       return "unknown";
339   }
340   GOOGLE_LOG(FATAL) << "can't reach here.";
341   return nullptr;
342 }
343 
344 static const char* const kNonLinkedWeakMessageReplacementName = "google.protobuf.Empty";
345 
346 #if !defined(_MSC_VER) || _MSC_VER >= 1900
347 const int FieldDescriptor::kMaxNumber;
348 const int FieldDescriptor::kFirstReservedNumber;
349 const int FieldDescriptor::kLastReservedNumber;
350 #endif
351 
352 namespace {
353 
354 // Note:  I distrust ctype.h due to locales.
ToUpper(char ch)355 char ToUpper(char ch) {
356   return (ch >= 'a' && ch <= 'z') ? (ch - 'a' + 'A') : ch;
357 }
358 
ToLower(char ch)359 char ToLower(char ch) {
360   return (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
361 }
362 
ToCamelCase(const std::string & input,bool lower_first)363 std::string ToCamelCase(const std::string& input, bool lower_first) {
364   bool capitalize_next = !lower_first;
365   std::string result;
366   result.reserve(input.size());
367 
368   for (char character : input) {
369     if (character == '_') {
370       capitalize_next = true;
371     } else if (capitalize_next) {
372       result.push_back(ToUpper(character));
373       capitalize_next = false;
374     } else {
375       result.push_back(character);
376     }
377   }
378 
379   // Lower-case the first letter.
380   if (lower_first && !result.empty()) {
381     result[0] = ToLower(result[0]);
382   }
383 
384   return result;
385 }
386 
ToJsonName(const std::string & input)387 std::string ToJsonName(const std::string& input) {
388   bool capitalize_next = false;
389   std::string result;
390   result.reserve(input.size());
391 
392   for (char character : input) {
393     if (character == '_') {
394       capitalize_next = true;
395     } else if (capitalize_next) {
396       result.push_back(ToUpper(character));
397       capitalize_next = false;
398     } else {
399       result.push_back(character);
400     }
401   }
402 
403   return result;
404 }
405 
EnumValueToPascalCase(const std::string & input)406 std::string EnumValueToPascalCase(const std::string& input) {
407   bool next_upper = true;
408   std::string result;
409   result.reserve(input.size());
410 
411   for (char character : input) {
412     if (character == '_') {
413       next_upper = true;
414     } else {
415       if (next_upper) {
416         result.push_back(ToUpper(character));
417       } else {
418         result.push_back(ToLower(character));
419       }
420       next_upper = false;
421     }
422   }
423 
424   return result;
425 }
426 
427 // Class to remove an enum prefix from enum values.
428 class PrefixRemover {
429  public:
PrefixRemover(StringPiece prefix)430   PrefixRemover(StringPiece prefix) {
431     // Strip underscores and lower-case the prefix.
432     for (char character : prefix) {
433       if (character != '_') {
434         prefix_ += ascii_tolower(character);
435       }
436     }
437   }
438 
439   // Tries to remove the enum prefix from this enum value.
440   // If this is not possible, returns the input verbatim.
MaybeRemove(StringPiece str)441   std::string MaybeRemove(StringPiece str) {
442     // We can't just lowercase and strip str and look for a prefix.
443     // We need to properly recognize the difference between:
444     //
445     //   enum Foo {
446     //     FOO_BAR_BAZ = 0;
447     //     FOO_BARBAZ = 1;
448     //   }
449     //
450     // This is acceptable (though perhaps not advisable) because even when
451     // we PascalCase, these two will still be distinct (BarBaz vs. Barbaz).
452     size_t i, j;
453 
454     // Skip past prefix_ in str if we can.
455     for (i = 0, j = 0; i < str.size() && j < prefix_.size(); i++) {
456       if (str[i] == '_') {
457         continue;
458       }
459 
460       if (ascii_tolower(str[i]) != prefix_[j++]) {
461         return std::string(str);
462       }
463     }
464 
465     // If we didn't make it through the prefix, we've failed to strip the
466     // prefix.
467     if (j < prefix_.size()) {
468       return std::string(str);
469     }
470 
471     // Skip underscores between prefix and further characters.
472     while (i < str.size() && str[i] == '_') {
473       i++;
474     }
475 
476     // Enum label can't be the empty string.
477     if (i == str.size()) {
478       return std::string(str);
479     }
480 
481     // We successfully stripped the prefix.
482     str.remove_prefix(i);
483     return std::string(str);
484   }
485 
486  private:
487   std::string prefix_;
488 };
489 
490 // A DescriptorPool contains a bunch of hash-maps to implement the
491 // various Find*By*() methods.  Since hashtable lookups are O(1), it's
492 // most efficient to construct a fixed set of large hash-maps used by
493 // all objects in the pool rather than construct one or more small
494 // hash-maps for each object.
495 //
496 // The keys to these hash-maps are (parent, name) or (parent, number) pairs.
497 
498 typedef std::pair<const void*, StringPiece> PointerStringPair;
499 
500 typedef std::pair<const Descriptor*, int> DescriptorIntPair;
501 typedef std::pair<const EnumDescriptor*, int> EnumIntPair;
502 
503 #define HASH_MAP std::unordered_map
504 #define HASH_SET std::unordered_set
505 #define HASH_FXN hash
506 
507 template <typename PairType>
508 struct PointerIntegerPairHash {
operator ()google::protobuf::__anonc1b606e10211::PointerIntegerPairHash509   size_t operator()(const PairType& p) const {
510     static const size_t prime1 = 16777499;
511     static const size_t prime2 = 16777619;
512     return reinterpret_cast<size_t>(p.first) * prime1 ^
513            static_cast<size_t>(p.second) * prime2;
514   }
515 
516 #ifdef _MSC_VER
517   // Used only by MSVC and platforms where hash_map is not available.
518   static const size_t bucket_size = 4;
519   static const size_t min_buckets = 8;
520 #endif
operator ()google::protobuf::__anonc1b606e10211::PointerIntegerPairHash521   inline bool operator()(const PairType& a, const PairType& b) const {
522     return a < b;
523   }
524 };
525 
526 struct PointerStringPairHash {
operator ()google::protobuf::__anonc1b606e10211::PointerStringPairHash527   size_t operator()(const PointerStringPair& p) const {
528     static const size_t prime = 16777619;
529     hash<StringPiece> string_hash;
530     return reinterpret_cast<size_t>(p.first) * prime ^
531            static_cast<size_t>(string_hash(p.second));
532   }
533 
534 #ifdef _MSC_VER
535   // Used only by MSVC and platforms where hash_map is not available.
536   static const size_t bucket_size = 4;
537   static const size_t min_buckets = 8;
538 #endif
operator ()google::protobuf::__anonc1b606e10211::PointerStringPairHash539   inline bool operator()(const PointerStringPair& a,
540                          const PointerStringPair& b) const {
541     return a < b;
542   }
543 };
544 
545 
546 const Symbol kNullSymbol;
547 
548 struct SymbolByFullNameHash {
operator ()google::protobuf::__anonc1b606e10211::SymbolByFullNameHash549   size_t operator()(Symbol s) const {
550     return HASH_FXN<StringPiece>{}(s.full_name());
551   }
552 };
553 struct SymbolByFullNameEq {
operator ()google::protobuf::__anonc1b606e10211::SymbolByFullNameEq554   bool operator()(Symbol a, Symbol b) const {
555     return a.full_name() == b.full_name();
556   }
557 };
558 using SymbolsByNameSet =
559     HASH_SET<Symbol, SymbolByFullNameHash, SymbolByFullNameEq>;
560 
561 struct SymbolByParentHash {
operator ()google::protobuf::__anonc1b606e10211::SymbolByParentHash562   size_t operator()(Symbol s) const {
563     return PointerStringPairHash{}(s.parent_key());
564   }
565 };
566 struct SymbolByParentEq {
operator ()google::protobuf::__anonc1b606e10211::SymbolByParentEq567   bool operator()(Symbol a, Symbol b) const {
568     return a.parent_key() == b.parent_key();
569   }
570 };
571 using SymbolsByParentSet =
572     HASH_SET<Symbol, SymbolByParentHash, SymbolByParentEq>;
573 
574 typedef HASH_MAP<StringPiece, const FileDescriptor*,
575                  HASH_FXN<StringPiece>>
576     FilesByNameMap;
577 
578 typedef HASH_MAP<PointerStringPair, const FieldDescriptor*,
579                  PointerStringPairHash>
580     FieldsByNameMap;
581 
582 typedef HASH_MAP<DescriptorIntPair, const FieldDescriptor*,
583                  PointerIntegerPairHash<DescriptorIntPair>,
584                  std::equal_to<DescriptorIntPair>>
585     FieldsByNumberMap;
586 
587 typedef HASH_MAP<EnumIntPair, const EnumValueDescriptor*,
588                  PointerIntegerPairHash<EnumIntPair>,
589                  std::equal_to<EnumIntPair>>
590     EnumValuesByNumberMap;
591 // This is a map rather than a hash-map, since we use it to iterate
592 // through all the extensions that extend a given Descriptor, and an
593 // ordered data structure that implements lower_bound is convenient
594 // for that.
595 typedef std::map<DescriptorIntPair, const FieldDescriptor*>
596     ExtensionsGroupedByDescriptorMap;
597 typedef HASH_MAP<std::string, const SourceCodeInfo_Location*>
598     LocationsByPathMap;
599 
NewAllowedProto3Extendee()600 std::set<std::string>* NewAllowedProto3Extendee() {
601   auto allowed_proto3_extendees = new std::set<std::string>;
602   const char* kOptionNames[] = {
603       "FileOptions",      "MessageOptions", "FieldOptions",  "EnumOptions",
604       "EnumValueOptions", "ServiceOptions", "MethodOptions", "OneofOptions"};
605   for (const char* option_name : kOptionNames) {
606     // descriptor.proto has a different package name in opensource. We allow
607     // both so the opensource protocol compiler can also compile internal
608     // proto3 files with custom options. See: b/27567912
609     allowed_proto3_extendees->insert(std::string("google.protobuf.") +
610                                      option_name);
611     // Split the word to trick the opensource processing scripts so they
612     // will keep the original package name.
613     allowed_proto3_extendees->insert(std::string("proto") + "2." + option_name);
614   }
615   return allowed_proto3_extendees;
616 }
617 
618 // Checks whether the extendee type is allowed in proto3.
619 // Only extensions to descriptor options are allowed. We use name comparison
620 // instead of comparing the descriptor directly because the extensions may be
621 // defined in a different pool.
AllowedExtendeeInProto3(const std::string & name)622 bool AllowedExtendeeInProto3(const std::string& name) {
623   static auto allowed_proto3_extendees =
624       internal::OnShutdownDelete(NewAllowedProto3Extendee());
625   return allowed_proto3_extendees->find(name) !=
626          allowed_proto3_extendees->end();
627 }
628 
629 // This bump allocator arena is optimized for the use case of this file. It is
630 // mostly optimized for memory usage, since these objects are expected to live
631 // for the entirety of the program.
632 //
633 // Some differences from other arenas:
634 //  - It has a fixed number of non-trivial types it can hold. This allows
635 //    tracking the allocations with a single byte. In contrast, google::protobuf::Arena
636 //    uses 16 bytes per non-trivial object created.
637 //  - It has some extra metadata for rollbacks. This is necessary for
638 //    implementing the API below. This metadata is flushed at the end and would
639 //    not cause persistent memory usage.
640 //  - It tries to squeeze every byte of out the blocks. If an allocation is too
641 //    large for the current block we move the block to a secondary area where we
642 //    can still use it for smaller objects. This complicates rollback logic but
643 //    makes it much more memory efficient.
644 //
645 //  The allocation strategy is as follows:
646 //   - Memory is allocated from the front, with a forced 8 byte alignment.
647 //   - Metadata is allocated from the back, one byte per element.
648 //   - The metadata encodes one of two things:
649 //     * For types we want to track, the index into KnownTypes.
650 //     * For raw memory blocks, the size of the block (in 8 byte increments
651 //       to allow for a larger limit).
652 //   - When the raw data is too large to represent in the metadata byte, we
653 //     allocate this memory separately in the heap and store an OutOfLineAlloc
654 //     object instead. These come from large array allocations and alike.
655 //
656 //  Blocks are kept in 3 areas:
657 //   - `current_` is the one we are currently allocating from. When we need to
658 //     allocate a block that doesn't fit there, we make a new block and move the
659 //     old `current_` to one of the areas below.
660 //   - Blocks that have no more usable space left (ie less than 9 bytes) are
661 //     stored in `full_blocks_`.
662 //   - Blocks that have some usable space are categorized in
663 //     `small_size_blocks_` depending on how much space they have left.
664 //     See `kSmallSizes` to see which sizes we track.
665 //
666 class TableArena {
667  public:
668   // Allocate a block on `n` bytes, with no destructor information saved.
AllocateMemory(uint32_t n)669   void* AllocateMemory(uint32_t n) {
670     uint32_t tag = SizeToRawTag(n) + kFirstRawTag;
671     if (tag > 255) {
672       // We can't fit the size, use an OutOfLineAlloc.
673       return Create<OutOfLineAlloc>(OutOfLineAlloc{::operator new(n), n})->ptr;
674     }
675 
676     return AllocRawInternal(n, static_cast<Tag>(tag));
677   }
678 
679   // Allocate and construct an element of type `T` as if by
680   // `T(std::forward<Args>(args...))`.
681   // The object is registered for destruction, if its destructor is not trivial.
682   template <typename T, typename... Args>
683   T* Create(Args&&... args) {
684     static_assert(alignof(T) <= 8, "");
685     return ::new (AllocRawInternal(sizeof(T), TypeTag<T>(KnownTypes{})))
686         T(std::forward<Args>(args)...);
687   }
688 
TableArena()689   TableArena() {}
690 
691   TableArena(const TableArena&) = delete;
692   TableArena& operator=(const TableArena&) = delete;
693 
~TableArena()694   ~TableArena() {
695     // Uncomment this to debug usage statistics of the arena blocks.
696     // PrintUsageInfo();
697 
698     for (Block* list : GetLists()) {
699       while (list != nullptr) {
700         Block* b = list;
701         list = list->next;
702         b->VisitBlock(DestroyVisitor{});
703         b->Destroy();
704       }
705     }
706   }
707 
708 
709   // This function exists for debugging only.
710   // It can be called from the destructor to dump some info in the tests to
711   // inspect the usage of the arena.
PrintUsageInfo() const712   void PrintUsageInfo() const {
713     const auto print_histogram = [](Block* b, int size) {
714       std::map<uint32_t, uint32_t> unused_space_count;
715       int count = 0;
716       for (; b != nullptr; b = b->next) {
717         ++unused_space_count[b->space_left()];
718         ++count;
719       }
720       if (size > 0) {
721         fprintf(stderr, "  Blocks `At least %d`", size);
722       } else {
723         fprintf(stderr, "  Blocks `full`");
724       }
725       fprintf(stderr, ": %d blocks.\n", count);
726       for (auto p : unused_space_count) {
727         fprintf(stderr, "    space=%4u, count=%3u\n", p.first, p.second);
728       }
729     };
730 
731     fprintf(stderr, "TableArena unused space histogram:\n");
732     fprintf(stderr, "  Current: %u\n",
733             current_ != nullptr ? current_->space_left() : 0);
734     print_histogram(full_blocks_, 0);
735     for (size_t i = 0; i < kSmallSizes.size(); ++i) {
736       print_histogram(small_size_blocks_[i], kSmallSizes[i]);
737     }
738   }
739 
740   // Current allocation count.
741   // This can be used for checkpointing.
num_allocations() const742   size_t num_allocations() const { return num_allocations_; }
743 
744   // Rollback the latest allocations until we reach back to `checkpoint`
745   // num_allocations.
RollbackTo(size_t checkpoint)746   void RollbackTo(size_t checkpoint) {
747     while (num_allocations_ > checkpoint) {
748       GOOGLE_DCHECK(!rollback_info_.empty());
749       auto& info = rollback_info_.back();
750       Block* b = info.block;
751 
752       VisitAlloc(b->data(), &b->start_offset, &b->end_offset, DestroyVisitor{},
753                  KnownTypes{});
754       if (--info.count == 0) {
755         rollback_info_.pop_back();
756       }
757       --num_allocations_;
758     }
759 
760     // Reconstruct the lists and destroy empty blocks.
761     auto lists = GetLists();
762     current_ = full_blocks_ = nullptr;
763     small_size_blocks_.fill(nullptr);
764 
765     for (Block* list : lists) {
766       while (list != nullptr) {
767         Block* b = list;
768         list = list->next;
769 
770         if (b->start_offset == 0) {
771           // This is empty, free it.
772           b->Destroy();
773         } else {
774           RelocateToUsedList(b);
775         }
776       }
777     }
778   }
779 
780   // Clear all rollback information. Reduces memory usage.
781   // Trying to rollback past num_allocations() is now impossible.
ClearRollbackData()782   void ClearRollbackData() {
783     rollback_info_.clear();
784     rollback_info_.shrink_to_fit();
785   }
786 
787  private:
RoundUp(size_t n)788   static constexpr size_t RoundUp(size_t n) { return (n + 7) & ~7; }
789 
790   using Tag = unsigned char;
791 
AllocRawInternal(uint32_t size,Tag tag)792   void* AllocRawInternal(uint32_t size, Tag tag) {
793     GOOGLE_DCHECK_GT(size, 0);
794     size = RoundUp(size);
795 
796     Block* to_relocate = nullptr;
797     Block* to_use;
798 
799     for (size_t i = 0; i < kSmallSizes.size(); ++i) {
800       if (small_size_blocks_[i] != nullptr && size <= kSmallSizes[i]) {
801         to_use = to_relocate = PopBlock(small_size_blocks_[i]);
802         break;
803       }
804     }
805 
806     if (to_relocate != nullptr) {
807       // We found one in the loop.
808     } else if (current_ != nullptr && size + 1 <= current_->space_left()) {
809       to_use = current_;
810     } else {
811       // No space left anywhere, make a new block.
812       to_relocate = current_;
813       // For now we hardcode the size to one page. Note that the maximum we can
814       // allocate in the block according to the limits of Tag is less than 2k,
815       // so this can fit anything that Tag can represent.
816       constexpr size_t kBlockSize = 4096;
817       to_use = current_ = ::new (::operator new(kBlockSize)) Block(kBlockSize);
818       GOOGLE_DCHECK_GE(current_->space_left(), size + 1);
819     }
820 
821     ++num_allocations_;
822     if (!rollback_info_.empty() && rollback_info_.back().block == to_use) {
823       ++rollback_info_.back().count;
824     } else {
825       rollback_info_.push_back({to_use, 1});
826     }
827 
828     void* p = to_use->Allocate(size, tag);
829     if (to_relocate != nullptr) {
830       RelocateToUsedList(to_relocate);
831     }
832     return p;
833   }
834 
OperatorDelete(void * p,size_t s)835   static void OperatorDelete(void* p, size_t s) {
836 #if defined(__GXX_DELETE_WITH_SIZE__) || defined(__cpp_sized_deallocation)
837     ::operator delete(p, s);
838 #else
839     ::operator delete(p);
840 #endif
841   }
842 
843   struct OutOfLineAlloc {
844     void* ptr;
845     uint32_t size;
846   };
847 
848   template <typename... T>
849   struct TypeList {
850     static constexpr Tag kSize = static_cast<Tag>(sizeof...(T));
851   };
852 
853   template <typename T, typename Visitor>
RunVisitor(char * p,uint16_t * start,Visitor visit)854   static void RunVisitor(char* p, uint16_t* start, Visitor visit) {
855     *start -= RoundUp(sizeof(T));
856     visit(reinterpret_cast<T*>(p + *start));
857   }
858 
859   // Visit the allocation at the passed location.
860   // It updates start/end to be after the visited object.
861   // This allows visiting a whole block by calling the function in a loop.
862   template <typename Visitor, typename... T>
VisitAlloc(char * p,uint16_t * start,uint16_t * end,Visitor visit,TypeList<T...>)863   static void VisitAlloc(char* p, uint16_t* start, uint16_t* end, Visitor visit,
864                          TypeList<T...>) {
865     const Tag tag = static_cast<Tag>(p[*end]);
866     if (tag >= kFirstRawTag) {
867       // Raw memory. Skip it.
868       *start -= TagToSize(tag);
869     } else {
870       using F = void (*)(char*, uint16_t*, Visitor);
871       static constexpr F kFuncs[] = {&RunVisitor<T, Visitor>...};
872       kFuncs[tag](p, start, visit);
873     }
874     ++*end;
875   }
876 
877   template <typename U, typename... Ts>
TypeTag(TypeList<U,Ts...>)878   static constexpr Tag TypeTag(TypeList<U, Ts...>) {
879     return 0;
880   }
881 
882   template <
883       typename U, typename T, typename... Ts,
884       typename = typename std::enable_if<!std::is_same<U, T>::value>::type>
TypeTag(TypeList<T,Ts...>)885   static constexpr Tag TypeTag(TypeList<T, Ts...>) {
886     return 1 + TypeTag<U>(TypeList<Ts...>{});
887   }
888 
889   template <typename U>
TypeTag(TypeList<>)890   static constexpr Tag TypeTag(TypeList<>) {
891     static_assert(std::is_trivially_destructible<U>::value, "");
892     return SizeToRawTag(sizeof(U));
893   }
894 
895   using KnownTypes =
896       TypeList<OutOfLineAlloc, std::string,
897                // For name arrays
898                std::array<std::string, 2>, std::array<std::string, 3>,
899                std::array<std::string, 4>, std::array<std::string, 5>,
900                FileDescriptorTables, SourceCodeInfo, FileOptions,
901                MessageOptions, FieldOptions, ExtensionRangeOptions,
902                OneofOptions, EnumOptions, EnumValueOptions, ServiceOptions,
903                MethodOptions>;
904   static constexpr Tag kFirstRawTag = KnownTypes::kSize;
905 
906 
907   struct DestroyVisitor {
908     template <typename T>
operator ()google::protobuf::__anonc1b606e10211::TableArena::DestroyVisitor909     void operator()(T* p) {
910       p->~T();
911     }
operator ()google::protobuf::__anonc1b606e10211::TableArena::DestroyVisitor912     void operator()(OutOfLineAlloc* p) { OperatorDelete(p->ptr, p->size); }
913   };
914 
SizeToRawTag(size_t n)915   static uint32_t SizeToRawTag(size_t n) { return (RoundUp(n) / 8) - 1; }
916 
TagToSize(Tag tag)917   static uint32_t TagToSize(Tag tag) {
918     GOOGLE_DCHECK_GE(tag, kFirstRawTag);
919     return static_cast<uint32_t>(tag - kFirstRawTag + 1) * 8;
920   }
921 
922   struct Block {
923     uint16_t start_offset;
924     uint16_t end_offset;
925     uint16_t capacity;
926     Block* next;
927 
928     // `allocated_size` is the total size of the memory block allocated.
929     // The `Block` structure is constructed at the start and the rest of the
930     // memory is used as the payload of the `Block`.
Blockgoogle::protobuf::__anonc1b606e10211::TableArena::Block931     explicit Block(uint32_t allocated_size) {
932       start_offset = 0;
933       end_offset = capacity =
934           reinterpret_cast<char*>(this) + allocated_size - data();
935       next = nullptr;
936     }
937 
datagoogle::protobuf::__anonc1b606e10211::TableArena::Block938     char* data() {
939       return reinterpret_cast<char*>(this) + RoundUp(sizeof(Block));
940     }
941 
memory_usedgoogle::protobuf::__anonc1b606e10211::TableArena::Block942     uint32_t memory_used() {
943       return data() + capacity - reinterpret_cast<char*>(this);
944     }
space_leftgoogle::protobuf::__anonc1b606e10211::TableArena::Block945     uint32_t space_left() const { return end_offset - start_offset; }
946 
Allocategoogle::protobuf::__anonc1b606e10211::TableArena::Block947     void* Allocate(uint32_t n, Tag tag) {
948       GOOGLE_DCHECK_LE(n + 1, space_left());
949       void* p = data() + start_offset;
950       start_offset += n;
951       data()[--end_offset] = tag;
952       return p;
953     }
954 
Destroygoogle::protobuf::__anonc1b606e10211::TableArena::Block955     void Destroy() { OperatorDelete(this, memory_used()); }
956 
PrependTogoogle::protobuf::__anonc1b606e10211::TableArena::Block957     void PrependTo(Block*& list) {
958       next = list;
959       list = this;
960     }
961 
962     template <typename Visitor>
VisitBlockgoogle::protobuf::__anonc1b606e10211::TableArena::Block963     void VisitBlock(Visitor visit) {
964       for (uint16_t s = start_offset, e = end_offset; s != 0;) {
965         VisitAlloc(data(), &s, &e, visit, KnownTypes{});
966       }
967     }
968   };
969 
PopBlock(Block * & list)970   Block* PopBlock(Block*& list) {
971     Block* res = list;
972     list = list->next;
973     return res;
974   }
975 
RelocateToUsedList(Block * to_relocate)976   void RelocateToUsedList(Block* to_relocate) {
977     if (current_ == nullptr) {
978       current_ = to_relocate;
979       current_->next = nullptr;
980       return;
981     } else if (current_->space_left() < to_relocate->space_left()) {
982       std::swap(current_, to_relocate);
983       current_->next = nullptr;
984     }
985 
986     for (int i = kSmallSizes.size(); --i >= 0;) {
987       if (to_relocate->space_left() >= 1 + kSmallSizes[i]) {
988         to_relocate->PrependTo(small_size_blocks_[i]);
989         return;
990       }
991     }
992 
993     to_relocate->PrependTo(full_blocks_);
994   }
995 
996   static constexpr std::array<uint8_t, 6> kSmallSizes = {{
997       // Sizes for pointer arrays.
998       8, 16, 24, 32,
999       // Sizes for string arrays (for descriptor names).
1000       // The most common array sizes are 2 and 3.
1001       2 * sizeof(std::string), 3 * sizeof(std::string)}};
1002 
1003   // Helper function to iterate all lists.
GetLists() const1004   std::array<Block*, 2 + kSmallSizes.size()> GetLists() const {
1005     std::array<Block*, 2 + kSmallSizes.size()> res;
1006     res[0] = current_;
1007     res[1] = full_blocks_;
1008     std::copy(small_size_blocks_.begin(), small_size_blocks_.end(), &res[2]);
1009     return res;
1010   }
1011 
1012   Block* current_ = nullptr;
1013   std::array<Block*, kSmallSizes.size()> small_size_blocks_ = {{}};
1014   Block* full_blocks_ = nullptr;
1015 
1016   size_t num_allocations_ = 0;
1017   struct RollbackInfo {
1018     Block* block;
1019     size_t count;
1020   };
1021   std::vector<RollbackInfo> rollback_info_;
1022 };
1023 
1024 constexpr std::array<uint8_t, 6> TableArena::kSmallSizes;
1025 
1026 }  // anonymous namespace
1027 
1028 // ===================================================================
1029 // DescriptorPool::Tables
1030 
1031 class DescriptorPool::Tables {
1032  public:
1033   Tables();
1034   ~Tables();
1035 
1036   // Record the current state of the tables to the stack of checkpoints.
1037   // Each call to AddCheckpoint() must be paired with exactly one call to either
1038   // ClearLastCheckpoint() or RollbackToLastCheckpoint().
1039   //
1040   // This is used when building files, since some kinds of validation errors
1041   // cannot be detected until the file's descriptors have already been added to
1042   // the tables.
1043   //
1044   // This supports recursive checkpoints, since building a file may trigger
1045   // recursive building of other files. Note that recursive checkpoints are not
1046   // normally necessary; explicit dependencies are built prior to checkpointing.
1047   // So although we recursively build transitive imports, there is at most one
1048   // checkpoint in the stack during dependency building.
1049   //
1050   // Recursive checkpoints only arise during cross-linking of the descriptors.
1051   // Symbol references must be resolved, via DescriptorBuilder::FindSymbol and
1052   // friends. If the pending file references an unknown symbol
1053   // (e.g., it is not defined in the pending file's explicit dependencies), and
1054   // the pool is using a fallback database, and that database contains a file
1055   // defining that symbol, and that file has not yet been built by the pool,
1056   // the pool builds the file during cross-linking, leading to another
1057   // checkpoint.
1058   void AddCheckpoint();
1059 
1060   // Mark the last checkpoint as having cleared successfully, removing it from
1061   // the stack. If the stack is empty, all pending symbols will be committed.
1062   //
1063   // Note that this does not guarantee that the symbols added since the last
1064   // checkpoint won't be rolled back: if a checkpoint gets rolled back,
1065   // everything past that point gets rolled back, including symbols added after
1066   // checkpoints that were pushed onto the stack after it and marked as cleared.
1067   void ClearLastCheckpoint();
1068 
1069   // Roll back the Tables to the state of the checkpoint at the top of the
1070   // stack, removing everything that was added after that point.
1071   void RollbackToLastCheckpoint();
1072 
1073   // The stack of files which are currently being built.  Used to detect
1074   // cyclic dependencies when loading files from a DescriptorDatabase.  Not
1075   // used when fallback_database_ == nullptr.
1076   std::vector<std::string> pending_files_;
1077 
1078   // A set of files which we have tried to load from the fallback database
1079   // and encountered errors.  We will not attempt to load them again during
1080   // execution of the current public API call, but for compatibility with
1081   // legacy clients, this is cleared at the beginning of each public API call.
1082   // Not used when fallback_database_ == nullptr.
1083   HASH_SET<std::string> known_bad_files_;
1084 
1085   // A set of symbols which we have tried to load from the fallback database
1086   // and encountered errors. We will not attempt to load them again during
1087   // execution of the current public API call, but for compatibility with
1088   // legacy clients, this is cleared at the beginning of each public API call.
1089   HASH_SET<std::string> known_bad_symbols_;
1090 
1091   // The set of descriptors for which we've already loaded the full
1092   // set of extensions numbers from fallback_database_.
1093   HASH_SET<const Descriptor*> extensions_loaded_from_db_;
1094 
1095   // Maps type name to Descriptor::WellKnownType.  This is logically global
1096   // and const, but we make it a member here to simplify its construction and
1097   // destruction.  This only has 20-ish entries and is one per DescriptorPool,
1098   // so the overhead is small.
1099   HASH_MAP<std::string, Descriptor::WellKnownType> well_known_types_;
1100 
1101   // -----------------------------------------------------------------
1102   // Finding items.
1103 
1104   // Find symbols.  This returns a null Symbol (symbol.IsNull() is true)
1105   // if not found.
1106   inline Symbol FindSymbol(StringPiece key) const;
1107 
1108   // This implements the body of DescriptorPool::Find*ByName().  It should
1109   // really be a private method of DescriptorPool, but that would require
1110   // declaring Symbol in descriptor.h, which would drag all kinds of other
1111   // stuff into the header.  Yay C++.
1112   Symbol FindByNameHelper(const DescriptorPool* pool, StringPiece name);
1113 
1114   // These return nullptr if not found.
1115   inline const FileDescriptor* FindFile(StringPiece key) const;
1116   inline const FieldDescriptor* FindExtension(const Descriptor* extendee,
1117                                               int number) const;
1118   inline void FindAllExtensions(const Descriptor* extendee,
1119                                 std::vector<const FieldDescriptor*>* out) const;
1120 
1121   // -----------------------------------------------------------------
1122   // Adding items.
1123 
1124   // These add items to the corresponding tables.  They return false if
1125   // the key already exists in the table.  For AddSymbol(), the string passed
1126   // in must be one that was constructed using AllocateString(), as it will
1127   // be used as a key in the symbols_by_name_ map without copying.
1128   bool AddSymbol(const std::string& full_name, Symbol symbol);
1129   bool AddFile(const FileDescriptor* file);
1130   bool AddExtension(const FieldDescriptor* field);
1131 
1132   // -----------------------------------------------------------------
1133   // Allocating memory.
1134 
1135   // Allocate an object which will be reclaimed when the pool is
1136   // destroyed.  Note that the object's destructor will never be called,
1137   // so its fields must be plain old data (primitive data types and
1138   // pointers).  All of the descriptor types are such objects.
1139   template <typename Type>
1140   Type* Allocate();
1141 
1142   // Allocate an array of objects which will be reclaimed when the
1143   // pool in destroyed.  Again, destructors are never called.
1144   template <typename Type>
1145   Type* AllocateArray(int count);
1146 
1147   // Allocate a string which will be destroyed when the pool is destroyed.
1148   // The string is initialized to the given value for convenience.
1149   const std::string* AllocateString(StringPiece value);
1150 
1151   // Allocates an array of strings which will be destroyed when the pool is
1152   // destroyed. The array is initialized with the input values.
1153   template <typename... In>
1154   const std::string* AllocateStringArray(In&&... values);
1155 
1156   struct FieldNamesResult {
1157     std::string* array;
1158     int lowercase_index;
1159     int camelcase_index;
1160     int json_index;
1161   };
1162   // Allocate all 5 names of the field:
1163   // name, full name, lowercase, camelcase and json.
1164   // This function will dedup the strings when possible.
1165   // The resulting array contains `name` at index 0, `full_name` at index 1 and
1166   // the other 3 indices are specified in the result.
1167   FieldNamesResult AllocateFieldNames(const std::string& name,
1168                                       const std::string& scope,
1169                                       const std::string* opt_json_name);
1170 
1171   // Allocate a LazyInitData which will be destroyed when the pool is
1172   // destroyed.
1173   internal::LazyInitData* AllocateLazyInit();
1174 
1175   // Allocate a protocol message object.  Some older versions of GCC have
1176   // trouble understanding explicit template instantiations in some cases, so
1177   // in those cases we have to pass a dummy pointer of the right type as the
1178   // parameter instead of specifying the type explicitly.
1179   template <typename Type>
1180   Type* AllocateMessage(Type* dummy = nullptr);
1181 
1182   // Allocate a FileDescriptorTables object.
1183   FileDescriptorTables* AllocateFileTables();
1184 
1185  private:
1186   // All other memory allocated in the pool.  Must be first as other objects can
1187   // point into these.
1188   TableArena arena_;
1189 
1190   SymbolsByNameSet symbols_by_name_;
1191   FilesByNameMap files_by_name_;
1192   ExtensionsGroupedByDescriptorMap extensions_;
1193 
1194   struct CheckPoint {
CheckPointgoogle::protobuf::DescriptorPool::Tables::CheckPoint1195     explicit CheckPoint(const Tables* tables)
1196         : arena_before_checkpoint(tables->arena_.num_allocations()),
1197           pending_symbols_before_checkpoint(
1198               tables->symbols_after_checkpoint_.size()),
1199           pending_files_before_checkpoint(
1200               tables->files_after_checkpoint_.size()),
1201           pending_extensions_before_checkpoint(
1202               tables->extensions_after_checkpoint_.size()) {}
1203     int arena_before_checkpoint;
1204     int pending_symbols_before_checkpoint;
1205     int pending_files_before_checkpoint;
1206     int pending_extensions_before_checkpoint;
1207   };
1208   std::vector<CheckPoint> checkpoints_;
1209   std::vector<const char*> symbols_after_checkpoint_;
1210   std::vector<const char*> files_after_checkpoint_;
1211   std::vector<DescriptorIntPair> extensions_after_checkpoint_;
1212 
1213   // Allocate some bytes which will be reclaimed when the pool is
1214   // destroyed.
1215   void* AllocateBytes(int size);
1216 };
1217 
1218 // Contains tables specific to a particular file.  These tables are not
1219 // modified once the file has been constructed, so they need not be
1220 // protected by a mutex.  This makes operations that depend only on the
1221 // contents of a single file -- e.g. Descriptor::FindFieldByName() --
1222 // lock-free.
1223 //
1224 // For historical reasons, the definitions of the methods of
1225 // FileDescriptorTables and DescriptorPool::Tables are interleaved below.
1226 // These used to be a single class.
1227 class FileDescriptorTables {
1228  public:
1229   FileDescriptorTables();
1230   ~FileDescriptorTables();
1231 
1232   // Empty table, used with placeholder files.
1233   inline static const FileDescriptorTables& GetEmptyInstance();
1234 
1235   // -----------------------------------------------------------------
1236   // Finding items.
1237 
1238   // Returns a null Symbol (symbol.IsNull() is true) if not found.
1239   inline Symbol FindNestedSymbol(const void* parent,
1240                                  StringPiece name) const;
1241 
1242   // These return nullptr if not found.
1243   inline const FieldDescriptor* FindFieldByNumber(const Descriptor* parent,
1244                                                   int number) const;
1245   inline const FieldDescriptor* FindFieldByLowercaseName(
1246       const void* parent, StringPiece lowercase_name) const;
1247   inline const FieldDescriptor* FindFieldByCamelcaseName(
1248       const void* parent, StringPiece camelcase_name) const;
1249   inline const EnumValueDescriptor* FindEnumValueByNumber(
1250       const EnumDescriptor* parent, int number) const;
1251   // This creates a new EnumValueDescriptor if not found, in a thread-safe way.
1252   inline const EnumValueDescriptor* FindEnumValueByNumberCreatingIfUnknown(
1253       const EnumDescriptor* parent, int number) const;
1254 
1255   // -----------------------------------------------------------------
1256   // Adding items.
1257 
1258   // These add items to the corresponding tables.  They return false if
1259   // the key already exists in the table.  For AddAliasUnderParent(), the
1260   // string passed in must be one that was constructed using AllocateString(),
1261   // as it will be used as a key in the symbols_by_parent_ map without copying.
1262   bool AddAliasUnderParent(const void* parent, const std::string& name,
1263                            Symbol symbol);
1264   bool AddFieldByNumber(const FieldDescriptor* field);
1265   bool AddEnumValueByNumber(const EnumValueDescriptor* value);
1266 
1267   // Adds the field to the lowercase_name and camelcase_name maps.  Never
1268   // fails because we allow duplicates; the first field by the name wins.
1269   void AddFieldByStylizedNames(const FieldDescriptor* field);
1270 
1271   // Populates p->first->locations_by_path_ from p->second.
1272   // Unusual signature dictated by internal::call_once.
1273   static void BuildLocationsByPath(
1274       std::pair<const FileDescriptorTables*, const SourceCodeInfo*>* p);
1275 
1276   // Returns the location denoted by the specified path through info,
1277   // or nullptr if not found.
1278   // The value of info must be that of the corresponding FileDescriptor.
1279   // (Conceptually a pure function, but stateful as an optimisation.)
1280   const SourceCodeInfo_Location* GetSourceLocation(
1281       const std::vector<int>& path, const SourceCodeInfo* info) const;
1282 
1283   // Must be called after BuildFileImpl(), even if the build failed and
1284   // we are going to roll back to the last checkpoint.
1285   void FinalizeTables();
1286 
1287  private:
1288   const void* FindParentForFieldsByMap(const FieldDescriptor* field) const;
1289   static void FieldsByLowercaseNamesLazyInitStatic(
1290       const FileDescriptorTables* tables);
1291   void FieldsByLowercaseNamesLazyInitInternal() const;
1292   static void FieldsByCamelcaseNamesLazyInitStatic(
1293       const FileDescriptorTables* tables);
1294   void FieldsByCamelcaseNamesLazyInitInternal() const;
1295 
1296   SymbolsByParentSet symbols_by_parent_;
1297   mutable FieldsByNameMap fields_by_lowercase_name_;
1298   std::unique_ptr<FieldsByNameMap> fields_by_lowercase_name_tmp_;
1299   mutable internal::once_flag fields_by_lowercase_name_once_;
1300   mutable FieldsByNameMap fields_by_camelcase_name_;
1301   std::unique_ptr<FieldsByNameMap> fields_by_camelcase_name_tmp_;
1302   mutable internal::once_flag fields_by_camelcase_name_once_;
1303   FieldsByNumberMap fields_by_number_;  // Not including extensions.
1304   EnumValuesByNumberMap enum_values_by_number_;
1305   mutable EnumValuesByNumberMap unknown_enum_values_by_number_
1306       PROTOBUF_GUARDED_BY(unknown_enum_values_mu_);
1307 
1308   // Populated on first request to save space, hence constness games.
1309   mutable internal::once_flag locations_by_path_once_;
1310   mutable LocationsByPathMap locations_by_path_;
1311 
1312   // Mutex to protect the unknown-enum-value map due to dynamic
1313   // EnumValueDescriptor creation on unknown values.
1314   mutable internal::WrappedMutex unknown_enum_values_mu_;
1315 };
1316 
Tables()1317 DescriptorPool::Tables::Tables() {
1318   well_known_types_.insert({
1319       {"google.protobuf.DoubleValue", Descriptor::WELLKNOWNTYPE_DOUBLEVALUE},
1320       {"google.protobuf.FloatValue", Descriptor::WELLKNOWNTYPE_FLOATVALUE},
1321       {"google.protobuf.Int64Value", Descriptor::WELLKNOWNTYPE_INT64VALUE},
1322       {"google.protobuf.UInt64Value", Descriptor::WELLKNOWNTYPE_UINT64VALUE},
1323       {"google.protobuf.Int32Value", Descriptor::WELLKNOWNTYPE_INT32VALUE},
1324       {"google.protobuf.UInt32Value", Descriptor::WELLKNOWNTYPE_UINT32VALUE},
1325       {"google.protobuf.StringValue", Descriptor::WELLKNOWNTYPE_STRINGVALUE},
1326       {"google.protobuf.BytesValue", Descriptor::WELLKNOWNTYPE_BYTESVALUE},
1327       {"google.protobuf.BoolValue", Descriptor::WELLKNOWNTYPE_BOOLVALUE},
1328       {"google.protobuf.Any", Descriptor::WELLKNOWNTYPE_ANY},
1329       {"google.protobuf.FieldMask", Descriptor::WELLKNOWNTYPE_FIELDMASK},
1330       {"google.protobuf.Duration", Descriptor::WELLKNOWNTYPE_DURATION},
1331       {"google.protobuf.Timestamp", Descriptor::WELLKNOWNTYPE_TIMESTAMP},
1332       {"google.protobuf.Value", Descriptor::WELLKNOWNTYPE_VALUE},
1333       {"google.protobuf.ListValue", Descriptor::WELLKNOWNTYPE_LISTVALUE},
1334       {"google.protobuf.Struct", Descriptor::WELLKNOWNTYPE_STRUCT},
1335   });
1336 }
1337 
~Tables()1338 DescriptorPool::Tables::~Tables() { GOOGLE_DCHECK(checkpoints_.empty()); }
1339 
FileDescriptorTables()1340 FileDescriptorTables::FileDescriptorTables()
1341     : fields_by_lowercase_name_tmp_(new FieldsByNameMap()),
1342       fields_by_camelcase_name_tmp_(new FieldsByNameMap()) {}
1343 
~FileDescriptorTables()1344 FileDescriptorTables::~FileDescriptorTables() {}
1345 
GetEmptyInstance()1346 inline const FileDescriptorTables& FileDescriptorTables::GetEmptyInstance() {
1347   static auto file_descriptor_tables =
1348       internal::OnShutdownDelete(new FileDescriptorTables());
1349   return *file_descriptor_tables;
1350 }
1351 
AddCheckpoint()1352 void DescriptorPool::Tables::AddCheckpoint() {
1353   checkpoints_.push_back(CheckPoint(this));
1354 }
1355 
ClearLastCheckpoint()1356 void DescriptorPool::Tables::ClearLastCheckpoint() {
1357   GOOGLE_DCHECK(!checkpoints_.empty());
1358   checkpoints_.pop_back();
1359   if (checkpoints_.empty()) {
1360     // All checkpoints have been cleared: we can now commit all of the pending
1361     // data.
1362     symbols_after_checkpoint_.clear();
1363     files_after_checkpoint_.clear();
1364     extensions_after_checkpoint_.clear();
1365     arena_.ClearRollbackData();
1366   }
1367 }
1368 
RollbackToLastCheckpoint()1369 void DescriptorPool::Tables::RollbackToLastCheckpoint() {
1370   GOOGLE_DCHECK(!checkpoints_.empty());
1371   const CheckPoint& checkpoint = checkpoints_.back();
1372 
1373   for (size_t i = checkpoint.pending_symbols_before_checkpoint;
1374        i < symbols_after_checkpoint_.size(); i++) {
1375     Symbol::QueryKey name;
1376     name.name = symbols_after_checkpoint_[i];
1377     symbols_by_name_.erase(Symbol(&name));
1378   }
1379   for (size_t i = checkpoint.pending_files_before_checkpoint;
1380        i < files_after_checkpoint_.size(); i++) {
1381     files_by_name_.erase(files_after_checkpoint_[i]);
1382   }
1383   for (size_t i = checkpoint.pending_extensions_before_checkpoint;
1384        i < extensions_after_checkpoint_.size(); i++) {
1385     extensions_.erase(extensions_after_checkpoint_[i]);
1386   }
1387 
1388   symbols_after_checkpoint_.resize(
1389       checkpoint.pending_symbols_before_checkpoint);
1390   files_after_checkpoint_.resize(checkpoint.pending_files_before_checkpoint);
1391   extensions_after_checkpoint_.resize(
1392       checkpoint.pending_extensions_before_checkpoint);
1393 
1394   arena_.RollbackTo(checkpoint.arena_before_checkpoint);
1395   checkpoints_.pop_back();
1396 }
1397 
1398 // -------------------------------------------------------------------
1399 
FindSymbol(StringPiece key) const1400 inline Symbol DescriptorPool::Tables::FindSymbol(StringPiece key) const {
1401   Symbol::QueryKey name;
1402   name.name = key;
1403   auto it = symbols_by_name_.find(Symbol(&name));
1404   return it == symbols_by_name_.end() ? kNullSymbol : *it;
1405 }
1406 
FindNestedSymbol(const void * parent,StringPiece name) const1407 inline Symbol FileDescriptorTables::FindNestedSymbol(
1408     const void* parent, StringPiece name) const {
1409   Symbol::QueryKey query;
1410   query.name = name;
1411   query.parent = parent;
1412   auto it = symbols_by_parent_.find(Symbol(&query));
1413   return it == symbols_by_parent_.end() ? kNullSymbol : *it;
1414 }
1415 
FindByNameHelper(const DescriptorPool * pool,StringPiece name)1416 Symbol DescriptorPool::Tables::FindByNameHelper(const DescriptorPool* pool,
1417                                                 StringPiece name) {
1418   if (pool->mutex_ != nullptr) {
1419     // Fast path: the Symbol is already cached.  This is just a hash lookup.
1420     ReaderMutexLock lock(pool->mutex_);
1421     if (known_bad_symbols_.empty() && known_bad_files_.empty()) {
1422       Symbol result = FindSymbol(name);
1423       if (!result.IsNull()) return result;
1424     }
1425   }
1426   MutexLockMaybe lock(pool->mutex_);
1427   if (pool->fallback_database_ != nullptr) {
1428     known_bad_symbols_.clear();
1429     known_bad_files_.clear();
1430   }
1431   Symbol result = FindSymbol(name);
1432 
1433   if (result.IsNull() && pool->underlay_ != nullptr) {
1434     // Symbol not found; check the underlay.
1435     result = pool->underlay_->tables_->FindByNameHelper(pool->underlay_, name);
1436   }
1437 
1438   if (result.IsNull()) {
1439     // Symbol still not found, so check fallback database.
1440     if (pool->TryFindSymbolInFallbackDatabase(name)) {
1441       result = FindSymbol(name);
1442     }
1443   }
1444 
1445   return result;
1446 }
1447 
FindFile(StringPiece key) const1448 inline const FileDescriptor* DescriptorPool::Tables::FindFile(
1449     StringPiece key) const {
1450   return FindPtrOrNull(files_by_name_, key);
1451 }
1452 
FindFieldByNumber(const Descriptor * parent,int number) const1453 inline const FieldDescriptor* FileDescriptorTables::FindFieldByNumber(
1454     const Descriptor* parent, int number) const {
1455   return FindPtrOrNull(fields_by_number_, std::make_pair(parent, number));
1456 }
1457 
FindParentForFieldsByMap(const FieldDescriptor * field) const1458 const void* FileDescriptorTables::FindParentForFieldsByMap(
1459     const FieldDescriptor* field) const {
1460   if (field->is_extension()) {
1461     if (field->extension_scope() == nullptr) {
1462       return field->file();
1463     } else {
1464       return field->extension_scope();
1465     }
1466   } else {
1467     return field->containing_type();
1468   }
1469 }
1470 
FieldsByLowercaseNamesLazyInitStatic(const FileDescriptorTables * tables)1471 void FileDescriptorTables::FieldsByLowercaseNamesLazyInitStatic(
1472     const FileDescriptorTables* tables) {
1473   tables->FieldsByLowercaseNamesLazyInitInternal();
1474 }
1475 
FieldsByLowercaseNamesLazyInitInternal() const1476 void FileDescriptorTables::FieldsByLowercaseNamesLazyInitInternal() const {
1477   for (FieldsByNumberMap::const_iterator it = fields_by_number_.begin();
1478        it != fields_by_number_.end(); it++) {
1479     PointerStringPair lowercase_key(FindParentForFieldsByMap(it->second),
1480                                     it->second->lowercase_name().c_str());
1481     InsertIfNotPresent(&fields_by_lowercase_name_, lowercase_key,
1482                             it->second);
1483   }
1484 }
1485 
FindFieldByLowercaseName(const void * parent,StringPiece lowercase_name) const1486 inline const FieldDescriptor* FileDescriptorTables::FindFieldByLowercaseName(
1487     const void* parent, StringPiece lowercase_name) const {
1488   internal::call_once(
1489       fields_by_lowercase_name_once_,
1490       &FileDescriptorTables::FieldsByLowercaseNamesLazyInitStatic, this);
1491   return FindPtrOrNull(fields_by_lowercase_name_,
1492                             PointerStringPair(parent, lowercase_name));
1493 }
1494 
FieldsByCamelcaseNamesLazyInitStatic(const FileDescriptorTables * tables)1495 void FileDescriptorTables::FieldsByCamelcaseNamesLazyInitStatic(
1496     const FileDescriptorTables* tables) {
1497   tables->FieldsByCamelcaseNamesLazyInitInternal();
1498 }
1499 
FieldsByCamelcaseNamesLazyInitInternal() const1500 void FileDescriptorTables::FieldsByCamelcaseNamesLazyInitInternal() const {
1501   for (FieldsByNumberMap::const_iterator it = fields_by_number_.begin();
1502        it != fields_by_number_.end(); it++) {
1503     PointerStringPair camelcase_key(FindParentForFieldsByMap(it->second),
1504                                     it->second->camelcase_name().c_str());
1505     InsertIfNotPresent(&fields_by_camelcase_name_, camelcase_key,
1506                             it->second);
1507   }
1508 }
1509 
FindFieldByCamelcaseName(const void * parent,StringPiece camelcase_name) const1510 inline const FieldDescriptor* FileDescriptorTables::FindFieldByCamelcaseName(
1511     const void* parent, StringPiece camelcase_name) const {
1512   internal::call_once(
1513       fields_by_camelcase_name_once_,
1514       FileDescriptorTables::FieldsByCamelcaseNamesLazyInitStatic, this);
1515   return FindPtrOrNull(fields_by_camelcase_name_,
1516                             PointerStringPair(parent, camelcase_name));
1517 }
1518 
FindEnumValueByNumber(const EnumDescriptor * parent,int number) const1519 inline const EnumValueDescriptor* FileDescriptorTables::FindEnumValueByNumber(
1520     const EnumDescriptor* parent, int number) const {
1521   return FindPtrOrNull(enum_values_by_number_,
1522                             std::make_pair(parent, number));
1523 }
1524 
1525 inline const EnumValueDescriptor*
FindEnumValueByNumberCreatingIfUnknown(const EnumDescriptor * parent,int number) const1526 FileDescriptorTables::FindEnumValueByNumberCreatingIfUnknown(
1527     const EnumDescriptor* parent, int number) const {
1528   // First try, with map of compiled-in values.
1529   {
1530     const EnumValueDescriptor* desc = FindPtrOrNull(
1531         enum_values_by_number_, std::make_pair(parent, number));
1532     if (desc != nullptr) {
1533       return desc;
1534     }
1535   }
1536   // Second try, with reader lock held on unknown enum values: common case.
1537   {
1538     ReaderMutexLock l(&unknown_enum_values_mu_);
1539     const EnumValueDescriptor* desc = FindPtrOrNull(
1540         unknown_enum_values_by_number_, std::make_pair(parent, number));
1541     if (desc != nullptr) {
1542       return desc;
1543     }
1544   }
1545   // If not found, try again with writer lock held, and create new descriptor if
1546   // necessary.
1547   {
1548     WriterMutexLock l(&unknown_enum_values_mu_);
1549     const EnumValueDescriptor* desc = FindPtrOrNull(
1550         unknown_enum_values_by_number_, std::make_pair(parent, number));
1551     if (desc != nullptr) {
1552       return desc;
1553     }
1554 
1555     // Create an EnumValueDescriptor dynamically. We don't insert it into the
1556     // EnumDescriptor (it's not a part of the enum as originally defined), but
1557     // we do insert it into the table so that we can return the same pointer
1558     // later.
1559     std::string enum_value_name = StringPrintf("UNKNOWN_ENUM_VALUE_%s_%d",
1560                                                parent->name().c_str(), number);
1561     DescriptorPool::Tables* tables = const_cast<DescriptorPool::Tables*>(
1562         DescriptorPool::generated_pool()->tables_.get());
1563     EnumValueDescriptor* result = tables->Allocate<EnumValueDescriptor>();
1564     result->all_names_ = tables->AllocateStringArray(
1565         enum_value_name,
1566         StrCat(parent->full_name(), ".", enum_value_name));
1567     result->number_ = number;
1568     result->type_ = parent;
1569     result->options_ = &EnumValueOptions::default_instance();
1570     InsertIfNotPresent(&unknown_enum_values_by_number_,
1571                             std::make_pair(parent, number), result);
1572     return result;
1573   }
1574 }
1575 
FindExtension(const Descriptor * extendee,int number) const1576 inline const FieldDescriptor* DescriptorPool::Tables::FindExtension(
1577     const Descriptor* extendee, int number) const {
1578   return FindPtrOrNull(extensions_, std::make_pair(extendee, number));
1579 }
1580 
FindAllExtensions(const Descriptor * extendee,std::vector<const FieldDescriptor * > * out) const1581 inline void DescriptorPool::Tables::FindAllExtensions(
1582     const Descriptor* extendee,
1583     std::vector<const FieldDescriptor*>* out) const {
1584   ExtensionsGroupedByDescriptorMap::const_iterator it =
1585       extensions_.lower_bound(std::make_pair(extendee, 0));
1586   for (; it != extensions_.end() && it->first.first == extendee; ++it) {
1587     out->push_back(it->second);
1588   }
1589 }
1590 
1591 // -------------------------------------------------------------------
1592 
AddSymbol(const std::string & full_name,Symbol symbol)1593 bool DescriptorPool::Tables::AddSymbol(const std::string& full_name,
1594                                        Symbol symbol) {
1595   GOOGLE_DCHECK_EQ(full_name, symbol.full_name());
1596   if (symbols_by_name_.insert(symbol).second) {
1597     symbols_after_checkpoint_.push_back(full_name.c_str());
1598     return true;
1599   } else {
1600     return false;
1601   }
1602 }
1603 
AddAliasUnderParent(const void * parent,const std::string & name,Symbol symbol)1604 bool FileDescriptorTables::AddAliasUnderParent(const void* parent,
1605                                                const std::string& name,
1606                                                Symbol symbol) {
1607   GOOGLE_DCHECK_EQ(name, symbol.parent_key().second);
1608   GOOGLE_DCHECK_EQ(parent, symbol.parent_key().first);
1609   return symbols_by_parent_.insert(symbol).second;
1610 }
1611 
AddFile(const FileDescriptor * file)1612 bool DescriptorPool::Tables::AddFile(const FileDescriptor* file) {
1613   if (InsertIfNotPresent(&files_by_name_, file->name(), file)) {
1614     files_after_checkpoint_.push_back(file->name().c_str());
1615     return true;
1616   } else {
1617     return false;
1618   }
1619 }
1620 
FinalizeTables()1621 void FileDescriptorTables::FinalizeTables() {
1622   // Clean up the temporary maps used by AddFieldByStylizedNames().
1623   fields_by_lowercase_name_tmp_ = nullptr;
1624   fields_by_camelcase_name_tmp_ = nullptr;
1625 }
1626 
AddFieldByStylizedNames(const FieldDescriptor * field)1627 void FileDescriptorTables::AddFieldByStylizedNames(
1628     const FieldDescriptor* field) {
1629   const void* parent = FindParentForFieldsByMap(field);
1630 
1631   // We want fields_by_{lower,camel}case_name_ to be lazily built, but
1632   // cross-link order determines which entry will be present in the case of a
1633   // conflict. So we use the temporary maps that get destroyed after
1634   // BuildFileImpl() to detect the conflicts, and only store the conflicts in
1635   // the map that will persist. We will then lazily populate the rest of the
1636   // entries from fields_by_number_.
1637 
1638   PointerStringPair lowercase_key(parent, field->lowercase_name().c_str());
1639   if (!InsertIfNotPresent(fields_by_lowercase_name_tmp_.get(),
1640                                lowercase_key, field)) {
1641     InsertIfNotPresent(
1642         &fields_by_lowercase_name_, lowercase_key,
1643         FindPtrOrNull(*fields_by_lowercase_name_tmp_, lowercase_key));
1644   }
1645 
1646   PointerStringPair camelcase_key(parent, field->camelcase_name().c_str());
1647   if (!InsertIfNotPresent(fields_by_camelcase_name_tmp_.get(),
1648                                camelcase_key, field)) {
1649     InsertIfNotPresent(
1650         &fields_by_camelcase_name_, camelcase_key,
1651         FindPtrOrNull(*fields_by_camelcase_name_tmp_, camelcase_key));
1652   }
1653 }
1654 
AddFieldByNumber(const FieldDescriptor * field)1655 bool FileDescriptorTables::AddFieldByNumber(const FieldDescriptor* field) {
1656   DescriptorIntPair key(field->containing_type(), field->number());
1657   return InsertIfNotPresent(&fields_by_number_, key, field);
1658 }
1659 
AddEnumValueByNumber(const EnumValueDescriptor * value)1660 bool FileDescriptorTables::AddEnumValueByNumber(
1661     const EnumValueDescriptor* value) {
1662   EnumIntPair key(value->type(), value->number());
1663   return InsertIfNotPresent(&enum_values_by_number_, key, value);
1664 }
1665 
AddExtension(const FieldDescriptor * field)1666 bool DescriptorPool::Tables::AddExtension(const FieldDescriptor* field) {
1667   DescriptorIntPair key(field->containing_type(), field->number());
1668   if (InsertIfNotPresent(&extensions_, key, field)) {
1669     extensions_after_checkpoint_.push_back(key);
1670     return true;
1671   } else {
1672     return false;
1673   }
1674 }
1675 
1676 // -------------------------------------------------------------------
1677 
1678 template <typename Type>
Allocate()1679 Type* DescriptorPool::Tables::Allocate() {
1680   return reinterpret_cast<Type*>(AllocateBytes(sizeof(Type)));
1681 }
1682 
1683 template <typename Type>
AllocateArray(int count)1684 Type* DescriptorPool::Tables::AllocateArray(int count) {
1685   return reinterpret_cast<Type*>(AllocateBytes(sizeof(Type) * count));
1686 }
1687 
AllocateString(StringPiece value)1688 const std::string* DescriptorPool::Tables::AllocateString(
1689     StringPiece value) {
1690   return arena_.Create<std::string>(value);
1691 }
1692 
1693 template <typename... In>
AllocateStringArray(In &&...values)1694 const std::string* DescriptorPool::Tables::AllocateStringArray(In&&... values) {
1695   auto& array = *arena_.Create<std::array<std::string, sizeof...(In)>>();
1696   array = {{std::string(std::forward<In>(values))...}};
1697   return array.data();
1698 }
1699 
1700 DescriptorPool::Tables::FieldNamesResult
AllocateFieldNames(const std::string & name,const std::string & scope,const std::string * opt_json_name)1701 DescriptorPool::Tables::AllocateFieldNames(const std::string& name,
1702                                            const std::string& scope,
1703                                            const std::string* opt_json_name) {
1704   std::string lowercase_name = name;
1705   LowerString(&lowercase_name);
1706 
1707   std::string camelcase_name = ToCamelCase(name, /* lower_first = */ true);
1708   std::string json_name;
1709   if (opt_json_name != nullptr) {
1710     json_name = *opt_json_name;
1711   } else {
1712     json_name = ToJsonName(name);
1713   }
1714 
1715   const bool lower_eq_name = lowercase_name == name;
1716   const bool camel_eq_name = camelcase_name == name;
1717   const bool json_eq_name = json_name == name;
1718   const bool json_eq_camel = json_name == camelcase_name;
1719 
1720   const int total_count = 2 + (lower_eq_name ? 0 : 1) +
1721                           (camel_eq_name ? 0 : 1) +
1722                           (json_eq_name || json_eq_camel ? 0 : 1);
1723   FieldNamesResult result;
1724   // We use std::array to allow handling of the destruction of the strings.
1725   switch (total_count) {
1726     case 2:
1727       result.array = arena_.Create<std::array<std::string, 2>>()->data();
1728       break;
1729     case 3:
1730       result.array = arena_.Create<std::array<std::string, 3>>()->data();
1731       break;
1732     case 4:
1733       result.array = arena_.Create<std::array<std::string, 4>>()->data();
1734       break;
1735     case 5:
1736       result.array = arena_.Create<std::array<std::string, 5>>()->data();
1737       break;
1738   }
1739 
1740   result.array[0] = name;
1741   if (scope.empty()) {
1742     result.array[1] = name;
1743   } else {
1744     result.array[1] = StrCat(scope, ".", name);
1745   }
1746   int index = 2;
1747   if (lower_eq_name) {
1748     result.lowercase_index = 0;
1749   } else {
1750     result.lowercase_index = index;
1751     result.array[index++] = std::move(lowercase_name);
1752   }
1753 
1754   if (camel_eq_name) {
1755     result.camelcase_index = 0;
1756   } else {
1757     result.camelcase_index = index;
1758     result.array[index++] = std::move(camelcase_name);
1759   }
1760 
1761   if (json_eq_name) {
1762     result.json_index = 0;
1763   } else if (json_eq_camel) {
1764     result.json_index = result.camelcase_index;
1765   } else {
1766     result.json_index = index;
1767     result.array[index] = std::move(json_name);
1768   }
1769 
1770   return result;
1771 }
1772 
AllocateLazyInit()1773 internal::LazyInitData* DescriptorPool::Tables::AllocateLazyInit() {
1774   return arena_.Create<internal::LazyInitData>();
1775 }
1776 
1777 template <typename Type>
AllocateMessage(Type *)1778 Type* DescriptorPool::Tables::AllocateMessage(Type* /* dummy */) {
1779   return arena_.Create<Type>();
1780 }
1781 
AllocateFileTables()1782 FileDescriptorTables* DescriptorPool::Tables::AllocateFileTables() {
1783   return arena_.Create<FileDescriptorTables>();
1784 }
1785 
AllocateBytes(int size)1786 void* DescriptorPool::Tables::AllocateBytes(int size) {
1787   if (size == 0) return nullptr;
1788   return arena_.AllocateMemory(size);
1789 }
1790 
BuildLocationsByPath(std::pair<const FileDescriptorTables *,const SourceCodeInfo * > * p)1791 void FileDescriptorTables::BuildLocationsByPath(
1792     std::pair<const FileDescriptorTables*, const SourceCodeInfo*>* p) {
1793   for (int i = 0, len = p->second->location_size(); i < len; ++i) {
1794     const SourceCodeInfo_Location* loc = &p->second->location().Get(i);
1795     p->first->locations_by_path_[Join(loc->path(), ",")] = loc;
1796   }
1797 }
1798 
GetSourceLocation(const std::vector<int> & path,const SourceCodeInfo * info) const1799 const SourceCodeInfo_Location* FileDescriptorTables::GetSourceLocation(
1800     const std::vector<int>& path, const SourceCodeInfo* info) const {
1801   std::pair<const FileDescriptorTables*, const SourceCodeInfo*> p(
1802       std::make_pair(this, info));
1803   internal::call_once(locations_by_path_once_,
1804                       FileDescriptorTables::BuildLocationsByPath, &p);
1805   return FindPtrOrNull(locations_by_path_, Join(path, ","));
1806 }
1807 
1808 // ===================================================================
1809 // DescriptorPool
1810 
~ErrorCollector()1811 DescriptorPool::ErrorCollector::~ErrorCollector() {}
1812 
DescriptorPool()1813 DescriptorPool::DescriptorPool()
1814     : mutex_(nullptr),
1815       fallback_database_(nullptr),
1816       default_error_collector_(nullptr),
1817       underlay_(nullptr),
1818       tables_(new Tables),
1819       enforce_dependencies_(true),
1820       lazily_build_dependencies_(false),
1821       allow_unknown_(false),
1822       enforce_weak_(false),
1823       disallow_enforce_utf8_(false) {}
1824 
DescriptorPool(DescriptorDatabase * fallback_database,ErrorCollector * error_collector)1825 DescriptorPool::DescriptorPool(DescriptorDatabase* fallback_database,
1826                                ErrorCollector* error_collector)
1827     : mutex_(new internal::WrappedMutex),
1828       fallback_database_(fallback_database),
1829       default_error_collector_(error_collector),
1830       underlay_(nullptr),
1831       tables_(new Tables),
1832       enforce_dependencies_(true),
1833       lazily_build_dependencies_(false),
1834       allow_unknown_(false),
1835       enforce_weak_(false),
1836       disallow_enforce_utf8_(false) {}
1837 
DescriptorPool(const DescriptorPool * underlay)1838 DescriptorPool::DescriptorPool(const DescriptorPool* underlay)
1839     : mutex_(nullptr),
1840       fallback_database_(nullptr),
1841       default_error_collector_(nullptr),
1842       underlay_(underlay),
1843       tables_(new Tables),
1844       enforce_dependencies_(true),
1845       lazily_build_dependencies_(false),
1846       allow_unknown_(false),
1847       enforce_weak_(false),
1848       disallow_enforce_utf8_(false) {}
1849 
~DescriptorPool()1850 DescriptorPool::~DescriptorPool() {
1851   if (mutex_ != nullptr) delete mutex_;
1852 }
1853 
1854 // DescriptorPool::BuildFile() defined later.
1855 // DescriptorPool::BuildFileCollectingErrors() defined later.
1856 
InternalDontEnforceDependencies()1857 void DescriptorPool::InternalDontEnforceDependencies() {
1858   enforce_dependencies_ = false;
1859 }
1860 
AddUnusedImportTrackFile(ConstStringParam file_name,bool is_error)1861 void DescriptorPool::AddUnusedImportTrackFile(ConstStringParam file_name,
1862                                               bool is_error) {
1863   unused_import_track_files_[std::string(file_name)] = is_error;
1864 }
1865 
ClearUnusedImportTrackFiles()1866 void DescriptorPool::ClearUnusedImportTrackFiles() {
1867   unused_import_track_files_.clear();
1868 }
1869 
InternalIsFileLoaded(ConstStringParam filename) const1870 bool DescriptorPool::InternalIsFileLoaded(ConstStringParam filename) const {
1871   MutexLockMaybe lock(mutex_);
1872   return tables_->FindFile(filename) != nullptr;
1873 }
1874 
1875 // generated_pool ====================================================
1876 
1877 namespace {
1878 
1879 
GeneratedDatabase()1880 EncodedDescriptorDatabase* GeneratedDatabase() {
1881   static auto generated_database =
1882       internal::OnShutdownDelete(new EncodedDescriptorDatabase());
1883   return generated_database;
1884 }
1885 
NewGeneratedPool()1886 DescriptorPool* NewGeneratedPool() {
1887   auto generated_pool = new DescriptorPool(GeneratedDatabase());
1888   generated_pool->InternalSetLazilyBuildDependencies();
1889   return generated_pool;
1890 }
1891 
1892 }  // anonymous namespace
1893 
internal_generated_database()1894 DescriptorDatabase* DescriptorPool::internal_generated_database() {
1895   return GeneratedDatabase();
1896 }
1897 
internal_generated_pool()1898 DescriptorPool* DescriptorPool::internal_generated_pool() {
1899   static DescriptorPool* generated_pool =
1900       internal::OnShutdownDelete(NewGeneratedPool());
1901   return generated_pool;
1902 }
1903 
generated_pool()1904 const DescriptorPool* DescriptorPool::generated_pool() {
1905   const DescriptorPool* pool = internal_generated_pool();
1906   // Ensure that descriptor.proto has been registered in the generated pool.
1907   DescriptorProto::descriptor();
1908   return pool;
1909 }
1910 
1911 
InternalAddGeneratedFile(const void * encoded_file_descriptor,int size)1912 void DescriptorPool::InternalAddGeneratedFile(
1913     const void* encoded_file_descriptor, int size) {
1914   // So, this function is called in the process of initializing the
1915   // descriptors for generated proto classes.  Each generated .pb.cc file
1916   // has an internal procedure called AddDescriptors() which is called at
1917   // process startup, and that function calls this one in order to register
1918   // the raw bytes of the FileDescriptorProto representing the file.
1919   //
1920   // We do not actually construct the descriptor objects right away.  We just
1921   // hang on to the bytes until they are actually needed.  We actually construct
1922   // the descriptor the first time one of the following things happens:
1923   // * Someone calls a method like descriptor(), GetDescriptor(), or
1924   //   GetReflection() on the generated types, which requires returning the
1925   //   descriptor or an object based on it.
1926   // * Someone looks up the descriptor in DescriptorPool::generated_pool().
1927   //
1928   // Once one of these happens, the DescriptorPool actually parses the
1929   // FileDescriptorProto and generates a FileDescriptor (and all its children)
1930   // based on it.
1931   //
1932   // Note that FileDescriptorProto is itself a generated protocol message.
1933   // Therefore, when we parse one, we have to be very careful to avoid using
1934   // any descriptor-based operations, since this might cause infinite recursion
1935   // or deadlock.
1936   GOOGLE_CHECK(GeneratedDatabase()->Add(encoded_file_descriptor, size));
1937 }
1938 
1939 
1940 // Find*By* methods ==================================================
1941 
1942 // TODO(kenton):  There's a lot of repeated code here, but I'm not sure if
1943 //   there's any good way to factor it out.  Think about this some time when
1944 //   there's nothing more important to do (read: never).
1945 
FindFileByName(ConstStringParam name) const1946 const FileDescriptor* DescriptorPool::FindFileByName(
1947     ConstStringParam name) const {
1948   MutexLockMaybe lock(mutex_);
1949   if (fallback_database_ != nullptr) {
1950     tables_->known_bad_symbols_.clear();
1951     tables_->known_bad_files_.clear();
1952   }
1953   const FileDescriptor* result = tables_->FindFile(name);
1954   if (result != nullptr) return result;
1955   if (underlay_ != nullptr) {
1956     result = underlay_->FindFileByName(name);
1957     if (result != nullptr) return result;
1958   }
1959   if (TryFindFileInFallbackDatabase(name)) {
1960     result = tables_->FindFile(name);
1961     if (result != nullptr) return result;
1962   }
1963   return nullptr;
1964 }
1965 
FindFileContainingSymbol(ConstStringParam symbol_name) const1966 const FileDescriptor* DescriptorPool::FindFileContainingSymbol(
1967     ConstStringParam symbol_name) const {
1968   MutexLockMaybe lock(mutex_);
1969   if (fallback_database_ != nullptr) {
1970     tables_->known_bad_symbols_.clear();
1971     tables_->known_bad_files_.clear();
1972   }
1973   Symbol result = tables_->FindSymbol(symbol_name);
1974   if (!result.IsNull()) return result.GetFile();
1975   if (underlay_ != nullptr) {
1976     const FileDescriptor* file_result =
1977         underlay_->FindFileContainingSymbol(symbol_name);
1978     if (file_result != nullptr) return file_result;
1979   }
1980   if (TryFindSymbolInFallbackDatabase(symbol_name)) {
1981     result = tables_->FindSymbol(symbol_name);
1982     if (!result.IsNull()) return result.GetFile();
1983   }
1984   return nullptr;
1985 }
1986 
FindMessageTypeByName(ConstStringParam name) const1987 const Descriptor* DescriptorPool::FindMessageTypeByName(
1988     ConstStringParam name) const {
1989   return tables_->FindByNameHelper(this, name).descriptor();
1990 }
1991 
FindFieldByName(ConstStringParam name) const1992 const FieldDescriptor* DescriptorPool::FindFieldByName(
1993     ConstStringParam name) const {
1994   if (const FieldDescriptor* field =
1995           tables_->FindByNameHelper(this, name).field_descriptor()) {
1996     if (!field->is_extension()) {
1997       return field;
1998     }
1999   }
2000   return nullptr;
2001 }
2002 
FindExtensionByName(ConstStringParam name) const2003 const FieldDescriptor* DescriptorPool::FindExtensionByName(
2004     ConstStringParam name) const {
2005   if (const FieldDescriptor* field =
2006           tables_->FindByNameHelper(this, name).field_descriptor()) {
2007     if (field->is_extension()) {
2008       return field;
2009     }
2010   }
2011   return nullptr;
2012 }
2013 
FindOneofByName(ConstStringParam name) const2014 const OneofDescriptor* DescriptorPool::FindOneofByName(
2015     ConstStringParam name) const {
2016   return tables_->FindByNameHelper(this, name).oneof_descriptor();
2017 }
2018 
FindEnumTypeByName(ConstStringParam name) const2019 const EnumDescriptor* DescriptorPool::FindEnumTypeByName(
2020     ConstStringParam name) const {
2021   return tables_->FindByNameHelper(this, name).enum_descriptor();
2022 }
2023 
FindEnumValueByName(ConstStringParam name) const2024 const EnumValueDescriptor* DescriptorPool::FindEnumValueByName(
2025     ConstStringParam name) const {
2026   return tables_->FindByNameHelper(this, name).enum_value_descriptor();
2027 }
2028 
FindServiceByName(ConstStringParam name) const2029 const ServiceDescriptor* DescriptorPool::FindServiceByName(
2030     ConstStringParam name) const {
2031   return tables_->FindByNameHelper(this, name).service_descriptor();
2032 }
2033 
FindMethodByName(ConstStringParam name) const2034 const MethodDescriptor* DescriptorPool::FindMethodByName(
2035     ConstStringParam name) const {
2036   return tables_->FindByNameHelper(this, name).method_descriptor();
2037 }
2038 
FindExtensionByNumber(const Descriptor * extendee,int number) const2039 const FieldDescriptor* DescriptorPool::FindExtensionByNumber(
2040     const Descriptor* extendee, int number) const {
2041   if (extendee->extension_range_count() == 0) return nullptr;
2042   // A faster path to reduce lock contention in finding extensions, assuming
2043   // most extensions will be cache hit.
2044   if (mutex_ != nullptr) {
2045     ReaderMutexLock lock(mutex_);
2046     const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2047     if (result != nullptr) {
2048       return result;
2049     }
2050   }
2051   MutexLockMaybe lock(mutex_);
2052   if (fallback_database_ != nullptr) {
2053     tables_->known_bad_symbols_.clear();
2054     tables_->known_bad_files_.clear();
2055   }
2056   const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2057   if (result != nullptr) {
2058     return result;
2059   }
2060   if (underlay_ != nullptr) {
2061     result = underlay_->FindExtensionByNumber(extendee, number);
2062     if (result != nullptr) return result;
2063   }
2064   if (TryFindExtensionInFallbackDatabase(extendee, number)) {
2065     result = tables_->FindExtension(extendee, number);
2066     if (result != nullptr) {
2067       return result;
2068     }
2069   }
2070   return nullptr;
2071 }
2072 
InternalFindExtensionByNumberNoLock(const Descriptor * extendee,int number) const2073 const FieldDescriptor* DescriptorPool::InternalFindExtensionByNumberNoLock(
2074     const Descriptor* extendee, int number) const {
2075   if (extendee->extension_range_count() == 0) return nullptr;
2076 
2077   const FieldDescriptor* result = tables_->FindExtension(extendee, number);
2078   if (result != nullptr) {
2079     return result;
2080   }
2081 
2082   if (underlay_ != nullptr) {
2083     result = underlay_->InternalFindExtensionByNumberNoLock(extendee, number);
2084     if (result != nullptr) return result;
2085   }
2086 
2087   return nullptr;
2088 }
2089 
FindExtensionByPrintableName(const Descriptor * extendee,ConstStringParam printable_name) const2090 const FieldDescriptor* DescriptorPool::FindExtensionByPrintableName(
2091     const Descriptor* extendee, ConstStringParam printable_name) const {
2092   if (extendee->extension_range_count() == 0) return nullptr;
2093   const FieldDescriptor* result = FindExtensionByName(printable_name);
2094   if (result != nullptr && result->containing_type() == extendee) {
2095     return result;
2096   }
2097   if (extendee->options().message_set_wire_format()) {
2098     // MessageSet extensions may be identified by type name.
2099     const Descriptor* type = FindMessageTypeByName(printable_name);
2100     if (type != nullptr) {
2101       // Look for a matching extension in the foreign type's scope.
2102       const int type_extension_count = type->extension_count();
2103       for (int i = 0; i < type_extension_count; i++) {
2104         const FieldDescriptor* extension = type->extension(i);
2105         if (extension->containing_type() == extendee &&
2106             extension->type() == FieldDescriptor::TYPE_MESSAGE &&
2107             extension->is_optional() && extension->message_type() == type) {
2108           // Found it.
2109           return extension;
2110         }
2111       }
2112     }
2113   }
2114   return nullptr;
2115 }
2116 
FindAllExtensions(const Descriptor * extendee,std::vector<const FieldDescriptor * > * out) const2117 void DescriptorPool::FindAllExtensions(
2118     const Descriptor* extendee,
2119     std::vector<const FieldDescriptor*>* out) const {
2120   MutexLockMaybe lock(mutex_);
2121   if (fallback_database_ != nullptr) {
2122     tables_->known_bad_symbols_.clear();
2123     tables_->known_bad_files_.clear();
2124   }
2125 
2126   // Initialize tables_->extensions_ from the fallback database first
2127   // (but do this only once per descriptor).
2128   if (fallback_database_ != nullptr &&
2129       tables_->extensions_loaded_from_db_.count(extendee) == 0) {
2130     std::vector<int> numbers;
2131     if (fallback_database_->FindAllExtensionNumbers(extendee->full_name(),
2132                                                     &numbers)) {
2133       for (int number : numbers) {
2134         if (tables_->FindExtension(extendee, number) == nullptr) {
2135           TryFindExtensionInFallbackDatabase(extendee, number);
2136         }
2137       }
2138       tables_->extensions_loaded_from_db_.insert(extendee);
2139     }
2140   }
2141 
2142   tables_->FindAllExtensions(extendee, out);
2143   if (underlay_ != nullptr) {
2144     underlay_->FindAllExtensions(extendee, out);
2145   }
2146 }
2147 
2148 
2149 // -------------------------------------------------------------------
2150 
FindFieldByNumber(int key) const2151 const FieldDescriptor* Descriptor::FindFieldByNumber(int key) const {
2152   const FieldDescriptor* result = file()->tables_->FindFieldByNumber(this, key);
2153   if (result == nullptr || result->is_extension()) {
2154     return nullptr;
2155   } else {
2156     return result;
2157   }
2158 }
2159 
FindFieldByLowercaseName(ConstStringParam key) const2160 const FieldDescriptor* Descriptor::FindFieldByLowercaseName(
2161     ConstStringParam key) const {
2162   const FieldDescriptor* result =
2163       file()->tables_->FindFieldByLowercaseName(this, key);
2164   if (result == nullptr || result->is_extension()) {
2165     return nullptr;
2166   } else {
2167     return result;
2168   }
2169 }
2170 
FindFieldByCamelcaseName(ConstStringParam key) const2171 const FieldDescriptor* Descriptor::FindFieldByCamelcaseName(
2172     ConstStringParam key) const {
2173   const FieldDescriptor* result =
2174       file()->tables_->FindFieldByCamelcaseName(this, key);
2175   if (result == nullptr || result->is_extension()) {
2176     return nullptr;
2177   } else {
2178     return result;
2179   }
2180 }
2181 
FindFieldByName(ConstStringParam key) const2182 const FieldDescriptor* Descriptor::FindFieldByName(ConstStringParam key) const {
2183   const FieldDescriptor* field =
2184       file()->tables_->FindNestedSymbol(this, key).field_descriptor();
2185   return field != nullptr && !field->is_extension() ? field : nullptr;
2186 }
2187 
FindOneofByName(ConstStringParam key) const2188 const OneofDescriptor* Descriptor::FindOneofByName(ConstStringParam key) const {
2189   return file()->tables_->FindNestedSymbol(this, key).oneof_descriptor();
2190 }
2191 
FindExtensionByName(ConstStringParam key) const2192 const FieldDescriptor* Descriptor::FindExtensionByName(
2193     ConstStringParam key) const {
2194   const FieldDescriptor* field =
2195       file()->tables_->FindNestedSymbol(this, key).field_descriptor();
2196   return field != nullptr && field->is_extension() ? field : nullptr;
2197 }
2198 
FindExtensionByLowercaseName(ConstStringParam key) const2199 const FieldDescriptor* Descriptor::FindExtensionByLowercaseName(
2200     ConstStringParam key) const {
2201   const FieldDescriptor* result =
2202       file()->tables_->FindFieldByLowercaseName(this, key);
2203   if (result == nullptr || !result->is_extension()) {
2204     return nullptr;
2205   } else {
2206     return result;
2207   }
2208 }
2209 
FindExtensionByCamelcaseName(ConstStringParam key) const2210 const FieldDescriptor* Descriptor::FindExtensionByCamelcaseName(
2211     ConstStringParam key) const {
2212   const FieldDescriptor* result =
2213       file()->tables_->FindFieldByCamelcaseName(this, key);
2214   if (result == nullptr || !result->is_extension()) {
2215     return nullptr;
2216   } else {
2217     return result;
2218   }
2219 }
2220 
FindNestedTypeByName(ConstStringParam key) const2221 const Descriptor* Descriptor::FindNestedTypeByName(ConstStringParam key) const {
2222   return file()->tables_->FindNestedSymbol(this, key).descriptor();
2223 }
2224 
FindEnumTypeByName(ConstStringParam key) const2225 const EnumDescriptor* Descriptor::FindEnumTypeByName(
2226     ConstStringParam key) const {
2227   return file()->tables_->FindNestedSymbol(this, key).enum_descriptor();
2228 }
2229 
FindEnumValueByName(ConstStringParam key) const2230 const EnumValueDescriptor* Descriptor::FindEnumValueByName(
2231     ConstStringParam key) const {
2232   return file()->tables_->FindNestedSymbol(this, key).enum_value_descriptor();
2233 }
2234 
map_key() const2235 const FieldDescriptor* Descriptor::map_key() const {
2236   if (!options().map_entry()) return nullptr;
2237   GOOGLE_DCHECK_EQ(field_count(), 2);
2238   return field(0);
2239 }
2240 
map_value() const2241 const FieldDescriptor* Descriptor::map_value() const {
2242   if (!options().map_entry()) return nullptr;
2243   GOOGLE_DCHECK_EQ(field_count(), 2);
2244   return field(1);
2245 }
2246 
FindValueByName(ConstStringParam key) const2247 const EnumValueDescriptor* EnumDescriptor::FindValueByName(
2248     ConstStringParam key) const {
2249   return file()->tables_->FindNestedSymbol(this, key).enum_value_descriptor();
2250 }
2251 
FindValueByNumber(int key) const2252 const EnumValueDescriptor* EnumDescriptor::FindValueByNumber(int key) const {
2253   return file()->tables_->FindEnumValueByNumber(this, key);
2254 }
2255 
FindValueByNumberCreatingIfUnknown(int key) const2256 const EnumValueDescriptor* EnumDescriptor::FindValueByNumberCreatingIfUnknown(
2257     int key) const {
2258   return file()->tables_->FindEnumValueByNumberCreatingIfUnknown(this, key);
2259 }
2260 
FindMethodByName(ConstStringParam key) const2261 const MethodDescriptor* ServiceDescriptor::FindMethodByName(
2262     ConstStringParam key) const {
2263   return file()->tables_->FindNestedSymbol(this, key).method_descriptor();
2264 }
2265 
FindMessageTypeByName(ConstStringParam key) const2266 const Descriptor* FileDescriptor::FindMessageTypeByName(
2267     ConstStringParam key) const {
2268   return tables_->FindNestedSymbol(this, key).descriptor();
2269 }
2270 
FindEnumTypeByName(ConstStringParam key) const2271 const EnumDescriptor* FileDescriptor::FindEnumTypeByName(
2272     ConstStringParam key) const {
2273   return tables_->FindNestedSymbol(this, key).enum_descriptor();
2274 }
2275 
FindEnumValueByName(ConstStringParam key) const2276 const EnumValueDescriptor* FileDescriptor::FindEnumValueByName(
2277     ConstStringParam key) const {
2278   return tables_->FindNestedSymbol(this, key).enum_value_descriptor();
2279 }
2280 
FindServiceByName(ConstStringParam key) const2281 const ServiceDescriptor* FileDescriptor::FindServiceByName(
2282     ConstStringParam key) const {
2283   return tables_->FindNestedSymbol(this, key).service_descriptor();
2284 }
2285 
FindExtensionByName(ConstStringParam key) const2286 const FieldDescriptor* FileDescriptor::FindExtensionByName(
2287     ConstStringParam key) const {
2288   const FieldDescriptor* field =
2289       tables_->FindNestedSymbol(this, key).field_descriptor();
2290   return field != nullptr && field->is_extension() ? field : nullptr;
2291 }
2292 
FindExtensionByLowercaseName(ConstStringParam key) const2293 const FieldDescriptor* FileDescriptor::FindExtensionByLowercaseName(
2294     ConstStringParam key) const {
2295   const FieldDescriptor* result = tables_->FindFieldByLowercaseName(this, key);
2296   if (result == nullptr || !result->is_extension()) {
2297     return nullptr;
2298   } else {
2299     return result;
2300   }
2301 }
2302 
FindExtensionByCamelcaseName(ConstStringParam key) const2303 const FieldDescriptor* FileDescriptor::FindExtensionByCamelcaseName(
2304     ConstStringParam key) const {
2305   const FieldDescriptor* result = tables_->FindFieldByCamelcaseName(this, key);
2306   if (result == nullptr || !result->is_extension()) {
2307     return nullptr;
2308   } else {
2309     return result;
2310   }
2311 }
2312 
CopyTo(DescriptorProto_ExtensionRange * proto) const2313 void Descriptor::ExtensionRange::CopyTo(
2314     DescriptorProto_ExtensionRange* proto) const {
2315   proto->set_start(this->start);
2316   proto->set_end(this->end);
2317   if (options_ != &ExtensionRangeOptions::default_instance()) {
2318     *proto->mutable_options() = *options_;
2319   }
2320 }
2321 
2322 const Descriptor::ExtensionRange*
FindExtensionRangeContainingNumber(int number) const2323 Descriptor::FindExtensionRangeContainingNumber(int number) const {
2324   // Linear search should be fine because we don't expect a message to have
2325   // more than a couple extension ranges.
2326   for (int i = 0; i < extension_range_count(); i++) {
2327     if (number >= extension_range(i)->start &&
2328         number < extension_range(i)->end) {
2329       return extension_range(i);
2330     }
2331   }
2332   return nullptr;
2333 }
2334 
FindReservedRangeContainingNumber(int number) const2335 const Descriptor::ReservedRange* Descriptor::FindReservedRangeContainingNumber(
2336     int number) const {
2337   // TODO(chrisn): Consider a non-linear search.
2338   for (int i = 0; i < reserved_range_count(); i++) {
2339     if (number >= reserved_range(i)->start && number < reserved_range(i)->end) {
2340       return reserved_range(i);
2341     }
2342   }
2343   return nullptr;
2344 }
2345 
2346 const EnumDescriptor::ReservedRange*
FindReservedRangeContainingNumber(int number) const2347 EnumDescriptor::FindReservedRangeContainingNumber(int number) const {
2348   // TODO(chrisn): Consider a non-linear search.
2349   for (int i = 0; i < reserved_range_count(); i++) {
2350     if (number >= reserved_range(i)->start &&
2351         number <= reserved_range(i)->end) {
2352       return reserved_range(i);
2353     }
2354   }
2355   return nullptr;
2356 }
2357 
2358 // -------------------------------------------------------------------
2359 
TryFindFileInFallbackDatabase(StringPiece name) const2360 bool DescriptorPool::TryFindFileInFallbackDatabase(
2361     StringPiece name) const {
2362   if (fallback_database_ == nullptr) return false;
2363 
2364   auto name_string = std::string(name);
2365   if (tables_->known_bad_files_.count(name_string) > 0) return false;
2366 
2367   FileDescriptorProto file_proto;
2368   if (!fallback_database_->FindFileByName(name_string, &file_proto) ||
2369       BuildFileFromDatabase(file_proto) == nullptr) {
2370     tables_->known_bad_files_.insert(std::move(name_string));
2371     return false;
2372   }
2373   return true;
2374 }
2375 
IsSubSymbolOfBuiltType(StringPiece name) const2376 bool DescriptorPool::IsSubSymbolOfBuiltType(StringPiece name) const {
2377   auto prefix = std::string(name);
2378   for (;;) {
2379     std::string::size_type dot_pos = prefix.find_last_of('.');
2380     if (dot_pos == std::string::npos) {
2381       break;
2382     }
2383     prefix = prefix.substr(0, dot_pos);
2384     Symbol symbol = tables_->FindSymbol(prefix);
2385     // If the symbol type is anything other than PACKAGE, then its complete
2386     // definition is already known.
2387     if (!symbol.IsNull() && symbol.type() != Symbol::PACKAGE) {
2388       return true;
2389     }
2390   }
2391   if (underlay_ != nullptr) {
2392     // Check to see if any prefix of this symbol exists in the underlay.
2393     return underlay_->IsSubSymbolOfBuiltType(name);
2394   }
2395   return false;
2396 }
2397 
TryFindSymbolInFallbackDatabase(StringPiece name) const2398 bool DescriptorPool::TryFindSymbolInFallbackDatabase(
2399     StringPiece name) const {
2400   if (fallback_database_ == nullptr) return false;
2401 
2402   auto name_string = std::string(name);
2403   if (tables_->known_bad_symbols_.count(name_string) > 0) return false;
2404 
2405   FileDescriptorProto file_proto;
2406   if (  // We skip looking in the fallback database if the name is a sub-symbol
2407         // of any descriptor that already exists in the descriptor pool (except
2408         // for package descriptors).  This is valid because all symbols except
2409         // for packages are defined in a single file, so if the symbol exists
2410         // then we should already have its definition.
2411         //
2412         // The other reason to do this is to support "overriding" type
2413         // definitions by merging two databases that define the same type. (Yes,
2414         // people do this.)  The main difficulty with making this work is that
2415         // FindFileContainingSymbol() is allowed to return both false positives
2416         // (e.g., SimpleDescriptorDatabase, UpgradedDescriptorDatabase) and
2417         // false negatives (e.g. ProtoFileParser, SourceTreeDescriptorDatabase).
2418         // When two such databases are merged, looking up a non-existent
2419         // sub-symbol of a type that already exists in the descriptor pool can
2420         // result in an attempt to load multiple definitions of the same type.
2421         // The check below avoids this.
2422       IsSubSymbolOfBuiltType(name)
2423 
2424       // Look up file containing this symbol in fallback database.
2425       || !fallback_database_->FindFileContainingSymbol(name_string, &file_proto)
2426 
2427       // Check if we've already built this file. If so, it apparently doesn't
2428       // contain the symbol we're looking for.  Some DescriptorDatabases
2429       // return false positives.
2430       || tables_->FindFile(file_proto.name()) != nullptr
2431 
2432       // Build the file.
2433       || BuildFileFromDatabase(file_proto) == nullptr) {
2434     tables_->known_bad_symbols_.insert(std::move(name_string));
2435     return false;
2436   }
2437 
2438   return true;
2439 }
2440 
TryFindExtensionInFallbackDatabase(const Descriptor * containing_type,int field_number) const2441 bool DescriptorPool::TryFindExtensionInFallbackDatabase(
2442     const Descriptor* containing_type, int field_number) const {
2443   if (fallback_database_ == nullptr) return false;
2444 
2445   FileDescriptorProto file_proto;
2446   if (!fallback_database_->FindFileContainingExtension(
2447           containing_type->full_name(), field_number, &file_proto)) {
2448     return false;
2449   }
2450 
2451   if (tables_->FindFile(file_proto.name()) != nullptr) {
2452     // We've already loaded this file, and it apparently doesn't contain the
2453     // extension we're looking for.  Some DescriptorDatabases return false
2454     // positives.
2455     return false;
2456   }
2457 
2458   if (BuildFileFromDatabase(file_proto) == nullptr) {
2459     return false;
2460   }
2461 
2462   return true;
2463 }
2464 
2465 // ===================================================================
2466 
is_map_message_type() const2467 bool FieldDescriptor::is_map_message_type() const {
2468   return type_descriptor_.message_type->options().map_entry();
2469 }
2470 
DefaultValueAsString(bool quote_string_type) const2471 std::string FieldDescriptor::DefaultValueAsString(
2472     bool quote_string_type) const {
2473   GOOGLE_CHECK(has_default_value()) << "No default value";
2474   switch (cpp_type()) {
2475     case CPPTYPE_INT32:
2476       return StrCat(default_value_int32_t());
2477       break;
2478     case CPPTYPE_INT64:
2479       return StrCat(default_value_int64_t());
2480       break;
2481     case CPPTYPE_UINT32:
2482       return StrCat(default_value_uint32_t());
2483       break;
2484     case CPPTYPE_UINT64:
2485       return StrCat(default_value_uint64_t());
2486       break;
2487     case CPPTYPE_FLOAT:
2488       return SimpleFtoa(default_value_float());
2489       break;
2490     case CPPTYPE_DOUBLE:
2491       return SimpleDtoa(default_value_double());
2492       break;
2493     case CPPTYPE_BOOL:
2494       return default_value_bool() ? "true" : "false";
2495       break;
2496     case CPPTYPE_STRING:
2497       if (quote_string_type) {
2498         return "\"" + CEscape(default_value_string()) + "\"";
2499       } else {
2500         if (type() == TYPE_BYTES) {
2501           return CEscape(default_value_string());
2502         } else {
2503           return default_value_string();
2504         }
2505       }
2506       break;
2507     case CPPTYPE_ENUM:
2508       return default_value_enum()->name();
2509       break;
2510     case CPPTYPE_MESSAGE:
2511       GOOGLE_LOG(DFATAL) << "Messages can't have default values!";
2512       break;
2513   }
2514   GOOGLE_LOG(FATAL) << "Can't get here: failed to get default value as string";
2515   return "";
2516 }
2517 
2518 // CopyTo methods ====================================================
2519 
CopyTo(FileDescriptorProto * proto) const2520 void FileDescriptor::CopyTo(FileDescriptorProto* proto) const {
2521   proto->set_name(name());
2522   if (!package().empty()) proto->set_package(package());
2523   // TODO(liujisi): Also populate when syntax="proto2".
2524   if (syntax() == SYNTAX_PROTO3) proto->set_syntax(SyntaxName(syntax()));
2525 
2526   for (int i = 0; i < dependency_count(); i++) {
2527     proto->add_dependency(dependency(i)->name());
2528   }
2529 
2530   for (int i = 0; i < public_dependency_count(); i++) {
2531     proto->add_public_dependency(public_dependencies_[i]);
2532   }
2533 
2534   for (int i = 0; i < weak_dependency_count(); i++) {
2535     proto->add_weak_dependency(weak_dependencies_[i]);
2536   }
2537 
2538   for (int i = 0; i < message_type_count(); i++) {
2539     message_type(i)->CopyTo(proto->add_message_type());
2540   }
2541   for (int i = 0; i < enum_type_count(); i++) {
2542     enum_type(i)->CopyTo(proto->add_enum_type());
2543   }
2544   for (int i = 0; i < service_count(); i++) {
2545     service(i)->CopyTo(proto->add_service());
2546   }
2547   for (int i = 0; i < extension_count(); i++) {
2548     extension(i)->CopyTo(proto->add_extension());
2549   }
2550 
2551   if (&options() != &FileOptions::default_instance()) {
2552     proto->mutable_options()->CopyFrom(options());
2553   }
2554 }
2555 
CopyJsonNameTo(FileDescriptorProto * proto) const2556 void FileDescriptor::CopyJsonNameTo(FileDescriptorProto* proto) const {
2557   if (message_type_count() != proto->message_type_size() ||
2558       extension_count() != proto->extension_size()) {
2559     GOOGLE_LOG(ERROR) << "Cannot copy json_name to a proto of a different size.";
2560     return;
2561   }
2562   for (int i = 0; i < message_type_count(); i++) {
2563     message_type(i)->CopyJsonNameTo(proto->mutable_message_type(i));
2564   }
2565   for (int i = 0; i < extension_count(); i++) {
2566     extension(i)->CopyJsonNameTo(proto->mutable_extension(i));
2567   }
2568 }
2569 
CopySourceCodeInfoTo(FileDescriptorProto * proto) const2570 void FileDescriptor::CopySourceCodeInfoTo(FileDescriptorProto* proto) const {
2571   if (source_code_info_ &&
2572       source_code_info_ != &SourceCodeInfo::default_instance()) {
2573     proto->mutable_source_code_info()->CopyFrom(*source_code_info_);
2574   }
2575 }
2576 
CopyTo(DescriptorProto * proto) const2577 void Descriptor::CopyTo(DescriptorProto* proto) const {
2578   proto->set_name(name());
2579 
2580   for (int i = 0; i < field_count(); i++) {
2581     field(i)->CopyTo(proto->add_field());
2582   }
2583   for (int i = 0; i < oneof_decl_count(); i++) {
2584     oneof_decl(i)->CopyTo(proto->add_oneof_decl());
2585   }
2586   for (int i = 0; i < nested_type_count(); i++) {
2587     nested_type(i)->CopyTo(proto->add_nested_type());
2588   }
2589   for (int i = 0; i < enum_type_count(); i++) {
2590     enum_type(i)->CopyTo(proto->add_enum_type());
2591   }
2592   for (int i = 0; i < extension_range_count(); i++) {
2593     extension_range(i)->CopyTo(proto->add_extension_range());
2594   }
2595   for (int i = 0; i < extension_count(); i++) {
2596     extension(i)->CopyTo(proto->add_extension());
2597   }
2598   for (int i = 0; i < reserved_range_count(); i++) {
2599     DescriptorProto::ReservedRange* range = proto->add_reserved_range();
2600     range->set_start(reserved_range(i)->start);
2601     range->set_end(reserved_range(i)->end);
2602   }
2603   for (int i = 0; i < reserved_name_count(); i++) {
2604     proto->add_reserved_name(reserved_name(i));
2605   }
2606 
2607   if (&options() != &MessageOptions::default_instance()) {
2608     proto->mutable_options()->CopyFrom(options());
2609   }
2610 }
2611 
CopyJsonNameTo(DescriptorProto * proto) const2612 void Descriptor::CopyJsonNameTo(DescriptorProto* proto) const {
2613   if (field_count() != proto->field_size() ||
2614       nested_type_count() != proto->nested_type_size() ||
2615       extension_count() != proto->extension_size()) {
2616     GOOGLE_LOG(ERROR) << "Cannot copy json_name to a proto of a different size.";
2617     return;
2618   }
2619   for (int i = 0; i < field_count(); i++) {
2620     field(i)->CopyJsonNameTo(proto->mutable_field(i));
2621   }
2622   for (int i = 0; i < nested_type_count(); i++) {
2623     nested_type(i)->CopyJsonNameTo(proto->mutable_nested_type(i));
2624   }
2625   for (int i = 0; i < extension_count(); i++) {
2626     extension(i)->CopyJsonNameTo(proto->mutable_extension(i));
2627   }
2628 }
2629 
CopyTo(FieldDescriptorProto * proto) const2630 void FieldDescriptor::CopyTo(FieldDescriptorProto* proto) const {
2631   proto->set_name(name());
2632   proto->set_number(number());
2633   if (has_json_name_) {
2634     proto->set_json_name(json_name());
2635   }
2636   if (proto3_optional_) {
2637     proto->set_proto3_optional(true);
2638   }
2639   // Some compilers do not allow static_cast directly between two enum types,
2640   // so we must cast to int first.
2641   proto->set_label(static_cast<FieldDescriptorProto::Label>(
2642       implicit_cast<int>(label())));
2643   proto->set_type(static_cast<FieldDescriptorProto::Type>(
2644       implicit_cast<int>(type())));
2645 
2646   if (is_extension()) {
2647     if (!containing_type()->is_unqualified_placeholder_) {
2648       proto->set_extendee(".");
2649     }
2650     proto->mutable_extendee()->append(containing_type()->full_name());
2651   }
2652 
2653   if (cpp_type() == CPPTYPE_MESSAGE) {
2654     if (message_type()->is_placeholder_) {
2655       // We don't actually know if the type is a message type.  It could be
2656       // an enum.
2657       proto->clear_type();
2658     }
2659 
2660     if (!message_type()->is_unqualified_placeholder_) {
2661       proto->set_type_name(".");
2662     }
2663     proto->mutable_type_name()->append(message_type()->full_name());
2664   } else if (cpp_type() == CPPTYPE_ENUM) {
2665     if (!enum_type()->is_unqualified_placeholder_) {
2666       proto->set_type_name(".");
2667     }
2668     proto->mutable_type_name()->append(enum_type()->full_name());
2669   }
2670 
2671   if (has_default_value()) {
2672     proto->set_default_value(DefaultValueAsString(false));
2673   }
2674 
2675   if (containing_oneof() != nullptr && !is_extension()) {
2676     proto->set_oneof_index(containing_oneof()->index());
2677   }
2678 
2679   if (&options() != &FieldOptions::default_instance()) {
2680     proto->mutable_options()->CopyFrom(options());
2681   }
2682 }
2683 
CopyJsonNameTo(FieldDescriptorProto * proto) const2684 void FieldDescriptor::CopyJsonNameTo(FieldDescriptorProto* proto) const {
2685   proto->set_json_name(json_name());
2686 }
2687 
CopyTo(OneofDescriptorProto * proto) const2688 void OneofDescriptor::CopyTo(OneofDescriptorProto* proto) const {
2689   proto->set_name(name());
2690   if (&options() != &OneofOptions::default_instance()) {
2691     proto->mutable_options()->CopyFrom(options());
2692   }
2693 }
2694 
CopyTo(EnumDescriptorProto * proto) const2695 void EnumDescriptor::CopyTo(EnumDescriptorProto* proto) const {
2696   proto->set_name(name());
2697 
2698   for (int i = 0; i < value_count(); i++) {
2699     value(i)->CopyTo(proto->add_value());
2700   }
2701   for (int i = 0; i < reserved_range_count(); i++) {
2702     EnumDescriptorProto::EnumReservedRange* range = proto->add_reserved_range();
2703     range->set_start(reserved_range(i)->start);
2704     range->set_end(reserved_range(i)->end);
2705   }
2706   for (int i = 0; i < reserved_name_count(); i++) {
2707     proto->add_reserved_name(reserved_name(i));
2708   }
2709 
2710   if (&options() != &EnumOptions::default_instance()) {
2711     proto->mutable_options()->CopyFrom(options());
2712   }
2713 }
2714 
CopyTo(EnumValueDescriptorProto * proto) const2715 void EnumValueDescriptor::CopyTo(EnumValueDescriptorProto* proto) const {
2716   proto->set_name(name());
2717   proto->set_number(number());
2718 
2719   if (&options() != &EnumValueOptions::default_instance()) {
2720     proto->mutable_options()->CopyFrom(options());
2721   }
2722 }
2723 
CopyTo(ServiceDescriptorProto * proto) const2724 void ServiceDescriptor::CopyTo(ServiceDescriptorProto* proto) const {
2725   proto->set_name(name());
2726 
2727   for (int i = 0; i < method_count(); i++) {
2728     method(i)->CopyTo(proto->add_method());
2729   }
2730 
2731   if (&options() != &ServiceOptions::default_instance()) {
2732     proto->mutable_options()->CopyFrom(options());
2733   }
2734 }
2735 
CopyTo(MethodDescriptorProto * proto) const2736 void MethodDescriptor::CopyTo(MethodDescriptorProto* proto) const {
2737   proto->set_name(name());
2738 
2739   if (!input_type()->is_unqualified_placeholder_) {
2740     proto->set_input_type(".");
2741   }
2742   proto->mutable_input_type()->append(input_type()->full_name());
2743 
2744   if (!output_type()->is_unqualified_placeholder_) {
2745     proto->set_output_type(".");
2746   }
2747   proto->mutable_output_type()->append(output_type()->full_name());
2748 
2749   if (&options() != &MethodOptions::default_instance()) {
2750     proto->mutable_options()->CopyFrom(options());
2751   }
2752 
2753   if (client_streaming_) {
2754     proto->set_client_streaming(true);
2755   }
2756   if (server_streaming_) {
2757     proto->set_server_streaming(true);
2758   }
2759 }
2760 
2761 // DebugString methods ===============================================
2762 
2763 namespace {
2764 
RetrieveOptionsAssumingRightPool(int depth,const Message & options,std::vector<std::string> * option_entries)2765 bool RetrieveOptionsAssumingRightPool(
2766     int depth, const Message& options,
2767     std::vector<std::string>* option_entries) {
2768   option_entries->clear();
2769   const Reflection* reflection = options.GetReflection();
2770   std::vector<const FieldDescriptor*> fields;
2771   reflection->ListFields(options, &fields);
2772   for (const FieldDescriptor* field : fields) {
2773     int count = 1;
2774     bool repeated = false;
2775     if (field->is_repeated()) {
2776       count = reflection->FieldSize(options, field);
2777       repeated = true;
2778     }
2779     for (int j = 0; j < count; j++) {
2780       std::string fieldval;
2781       if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2782         std::string tmp;
2783         TextFormat::Printer printer;
2784         printer.SetExpandAny(true);
2785         printer.SetInitialIndentLevel(depth + 1);
2786         printer.PrintFieldValueToString(options, field, repeated ? j : -1,
2787                                         &tmp);
2788         fieldval.append("{\n");
2789         fieldval.append(tmp);
2790         fieldval.append(depth * 2, ' ');
2791         fieldval.append("}");
2792       } else {
2793         TextFormat::PrintFieldValueToString(options, field, repeated ? j : -1,
2794                                             &fieldval);
2795       }
2796       std::string name;
2797       if (field->is_extension()) {
2798         name = "(." + field->full_name() + ")";
2799       } else {
2800         name = field->name();
2801       }
2802       option_entries->push_back(name + " = " + fieldval);
2803     }
2804   }
2805   return !option_entries->empty();
2806 }
2807 
2808 // Used by each of the option formatters.
RetrieveOptions(int depth,const Message & options,const DescriptorPool * pool,std::vector<std::string> * option_entries)2809 bool RetrieveOptions(int depth, const Message& options,
2810                      const DescriptorPool* pool,
2811                      std::vector<std::string>* option_entries) {
2812   // When printing custom options for a descriptor, we must use an options
2813   // message built on top of the same DescriptorPool where the descriptor
2814   // is coming from. This is to ensure we are interpreting custom options
2815   // against the right pool.
2816   if (options.GetDescriptor()->file()->pool() == pool) {
2817     return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
2818   } else {
2819     const Descriptor* option_descriptor =
2820         pool->FindMessageTypeByName(options.GetDescriptor()->full_name());
2821     if (option_descriptor == nullptr) {
2822       // descriptor.proto is not in the pool. This means no custom options are
2823       // used so we are safe to proceed with the compiled options message type.
2824       return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
2825     }
2826     DynamicMessageFactory factory;
2827     std::unique_ptr<Message> dynamic_options(
2828         factory.GetPrototype(option_descriptor)->New());
2829     if (dynamic_options->ParseFromString(options.SerializeAsString())) {
2830       return RetrieveOptionsAssumingRightPool(depth, *dynamic_options,
2831                                               option_entries);
2832     } else {
2833       GOOGLE_LOG(ERROR) << "Found invalid proto option data for: "
2834                  << options.GetDescriptor()->full_name();
2835       return RetrieveOptionsAssumingRightPool(depth, options, option_entries);
2836     }
2837   }
2838 }
2839 
2840 // Formats options that all appear together in brackets. Does not include
2841 // brackets.
FormatBracketedOptions(int depth,const Message & options,const DescriptorPool * pool,std::string * output)2842 bool FormatBracketedOptions(int depth, const Message& options,
2843                             const DescriptorPool* pool, std::string* output) {
2844   std::vector<std::string> all_options;
2845   if (RetrieveOptions(depth, options, pool, &all_options)) {
2846     output->append(Join(all_options, ", "));
2847   }
2848   return !all_options.empty();
2849 }
2850 
2851 // Formats options one per line
FormatLineOptions(int depth,const Message & options,const DescriptorPool * pool,std::string * output)2852 bool FormatLineOptions(int depth, const Message& options,
2853                        const DescriptorPool* pool, std::string* output) {
2854   std::string prefix(depth * 2, ' ');
2855   std::vector<std::string> all_options;
2856   if (RetrieveOptions(depth, options, pool, &all_options)) {
2857     for (const std::string& option : all_options) {
2858       strings::SubstituteAndAppend(output, "$0option $1;\n", prefix, option);
2859     }
2860   }
2861   return !all_options.empty();
2862 }
2863 
2864 class SourceLocationCommentPrinter {
2865  public:
2866   template <typename DescType>
SourceLocationCommentPrinter(const DescType * desc,const std::string & prefix,const DebugStringOptions & options)2867   SourceLocationCommentPrinter(const DescType* desc, const std::string& prefix,
2868                                const DebugStringOptions& options)
2869       : options_(options), prefix_(prefix) {
2870     // Perform the SourceLocation lookup only if we're including user comments,
2871     // because the lookup is fairly expensive.
2872     have_source_loc_ =
2873         options.include_comments && desc->GetSourceLocation(&source_loc_);
2874   }
SourceLocationCommentPrinter(const FileDescriptor * file,const std::vector<int> & path,const std::string & prefix,const DebugStringOptions & options)2875   SourceLocationCommentPrinter(const FileDescriptor* file,
2876                                const std::vector<int>& path,
2877                                const std::string& prefix,
2878                                const DebugStringOptions& options)
2879       : options_(options), prefix_(prefix) {
2880     // Perform the SourceLocation lookup only if we're including user comments,
2881     // because the lookup is fairly expensive.
2882     have_source_loc_ =
2883         options.include_comments && file->GetSourceLocation(path, &source_loc_);
2884   }
AddPreComment(std::string * output)2885   void AddPreComment(std::string* output) {
2886     if (have_source_loc_) {
2887       // Detached leading comments.
2888       for (const std::string& leading_detached_comment :
2889            source_loc_.leading_detached_comments) {
2890         *output += FormatComment(leading_detached_comment);
2891         *output += "\n";
2892       }
2893       // Attached leading comments.
2894       if (!source_loc_.leading_comments.empty()) {
2895         *output += FormatComment(source_loc_.leading_comments);
2896       }
2897     }
2898   }
AddPostComment(std::string * output)2899   void AddPostComment(std::string* output) {
2900     if (have_source_loc_ && source_loc_.trailing_comments.size() > 0) {
2901       *output += FormatComment(source_loc_.trailing_comments);
2902     }
2903   }
2904 
2905   // Format comment such that each line becomes a full-line C++-style comment in
2906   // the DebugString() output.
FormatComment(const std::string & comment_text)2907   std::string FormatComment(const std::string& comment_text) {
2908     std::string stripped_comment = comment_text;
2909     StripWhitespace(&stripped_comment);
2910     std::vector<std::string> lines = Split(stripped_comment, "\n");
2911     std::string output;
2912     for (const std::string& line : lines) {
2913       strings::SubstituteAndAppend(&output, "$0// $1\n", prefix_, line);
2914     }
2915     return output;
2916   }
2917 
2918  private:
2919 
2920   bool have_source_loc_;
2921   SourceLocation source_loc_;
2922   DebugStringOptions options_;
2923   std::string prefix_;
2924 };
2925 
2926 }  // anonymous namespace
2927 
DebugString() const2928 std::string FileDescriptor::DebugString() const {
2929   DebugStringOptions options;  // default options
2930   return DebugStringWithOptions(options);
2931 }
2932 
DebugStringWithOptions(const DebugStringOptions & debug_string_options) const2933 std::string FileDescriptor::DebugStringWithOptions(
2934     const DebugStringOptions& debug_string_options) const {
2935   std::string contents;
2936   {
2937     std::vector<int> path;
2938     path.push_back(FileDescriptorProto::kSyntaxFieldNumber);
2939     SourceLocationCommentPrinter syntax_comment(this, path, "",
2940                                                 debug_string_options);
2941     syntax_comment.AddPreComment(&contents);
2942     strings::SubstituteAndAppend(&contents, "syntax = \"$0\";\n\n",
2943                               SyntaxName(syntax()));
2944     syntax_comment.AddPostComment(&contents);
2945   }
2946 
2947   SourceLocationCommentPrinter comment_printer(this, "", debug_string_options);
2948   comment_printer.AddPreComment(&contents);
2949 
2950   std::set<int> public_dependencies;
2951   std::set<int> weak_dependencies;
2952   public_dependencies.insert(public_dependencies_,
2953                              public_dependencies_ + public_dependency_count_);
2954   weak_dependencies.insert(weak_dependencies_,
2955                            weak_dependencies_ + weak_dependency_count_);
2956 
2957   for (int i = 0; i < dependency_count(); i++) {
2958     if (public_dependencies.count(i) > 0) {
2959       strings::SubstituteAndAppend(&contents, "import public \"$0\";\n",
2960                                 dependency(i)->name());
2961     } else if (weak_dependencies.count(i) > 0) {
2962       strings::SubstituteAndAppend(&contents, "import weak \"$0\";\n",
2963                                 dependency(i)->name());
2964     } else {
2965       strings::SubstituteAndAppend(&contents, "import \"$0\";\n",
2966                                 dependency(i)->name());
2967     }
2968   }
2969 
2970   if (!package().empty()) {
2971     std::vector<int> path;
2972     path.push_back(FileDescriptorProto::kPackageFieldNumber);
2973     SourceLocationCommentPrinter package_comment(this, path, "",
2974                                                  debug_string_options);
2975     package_comment.AddPreComment(&contents);
2976     strings::SubstituteAndAppend(&contents, "package $0;\n\n", package());
2977     package_comment.AddPostComment(&contents);
2978   }
2979 
2980   if (FormatLineOptions(0, options(), pool(), &contents)) {
2981     contents.append("\n");  // add some space if we had options
2982   }
2983 
2984   for (int i = 0; i < enum_type_count(); i++) {
2985     enum_type(i)->DebugString(0, &contents, debug_string_options);
2986     contents.append("\n");
2987   }
2988 
2989   // Find all the 'group' type extensions; we will not output their nested
2990   // definitions (those will be done with their group field descriptor).
2991   std::set<const Descriptor*> groups;
2992   for (int i = 0; i < extension_count(); i++) {
2993     if (extension(i)->type() == FieldDescriptor::TYPE_GROUP) {
2994       groups.insert(extension(i)->message_type());
2995     }
2996   }
2997 
2998   for (int i = 0; i < message_type_count(); i++) {
2999     if (groups.count(message_type(i)) == 0) {
3000       message_type(i)->DebugString(0, &contents, debug_string_options,
3001                                    /* include_opening_clause */ true);
3002       contents.append("\n");
3003     }
3004   }
3005 
3006   for (int i = 0; i < service_count(); i++) {
3007     service(i)->DebugString(&contents, debug_string_options);
3008     contents.append("\n");
3009   }
3010 
3011   const Descriptor* containing_type = nullptr;
3012   for (int i = 0; i < extension_count(); i++) {
3013     if (extension(i)->containing_type() != containing_type) {
3014       if (i > 0) contents.append("}\n\n");
3015       containing_type = extension(i)->containing_type();
3016       strings::SubstituteAndAppend(&contents, "extend .$0 {\n",
3017                                 containing_type->full_name());
3018     }
3019     extension(i)->DebugString(1, &contents, debug_string_options);
3020   }
3021   if (extension_count() > 0) contents.append("}\n\n");
3022 
3023   comment_printer.AddPostComment(&contents);
3024 
3025   return contents;
3026 }
3027 
DebugString() const3028 std::string Descriptor::DebugString() const {
3029   DebugStringOptions options;  // default options
3030   return DebugStringWithOptions(options);
3031 }
3032 
DebugStringWithOptions(const DebugStringOptions & options) const3033 std::string Descriptor::DebugStringWithOptions(
3034     const DebugStringOptions& options) const {
3035   std::string contents;
3036   DebugString(0, &contents, options, /* include_opening_clause */ true);
3037   return contents;
3038 }
3039 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options,bool include_opening_clause) const3040 void Descriptor::DebugString(int depth, std::string* contents,
3041                              const DebugStringOptions& debug_string_options,
3042                              bool include_opening_clause) const {
3043   if (options().map_entry()) {
3044     // Do not generate debug string for auto-generated map-entry type.
3045     return;
3046   }
3047   std::string prefix(depth * 2, ' ');
3048   ++depth;
3049 
3050   SourceLocationCommentPrinter comment_printer(this, prefix,
3051                                                debug_string_options);
3052   comment_printer.AddPreComment(contents);
3053 
3054   if (include_opening_clause) {
3055     strings::SubstituteAndAppend(contents, "$0message $1", prefix, name());
3056   }
3057   contents->append(" {\n");
3058 
3059   FormatLineOptions(depth, options(), file()->pool(), contents);
3060 
3061   // Find all the 'group' types for fields and extensions; we will not output
3062   // their nested definitions (those will be done with their group field
3063   // descriptor).
3064   std::set<const Descriptor*> groups;
3065   for (int i = 0; i < field_count(); i++) {
3066     if (field(i)->type() == FieldDescriptor::TYPE_GROUP) {
3067       groups.insert(field(i)->message_type());
3068     }
3069   }
3070   for (int i = 0; i < extension_count(); i++) {
3071     if (extension(i)->type() == FieldDescriptor::TYPE_GROUP) {
3072       groups.insert(extension(i)->message_type());
3073     }
3074   }
3075 
3076   for (int i = 0; i < nested_type_count(); i++) {
3077     if (groups.count(nested_type(i)) == 0) {
3078       nested_type(i)->DebugString(depth, contents, debug_string_options,
3079                                   /* include_opening_clause */ true);
3080     }
3081   }
3082   for (int i = 0; i < enum_type_count(); i++) {
3083     enum_type(i)->DebugString(depth, contents, debug_string_options);
3084   }
3085   for (int i = 0; i < field_count(); i++) {
3086     if (field(i)->real_containing_oneof() == nullptr) {
3087       field(i)->DebugString(depth, contents, debug_string_options);
3088     } else if (field(i)->containing_oneof()->field(0) == field(i)) {
3089       // This is the first field in this oneof, so print the whole oneof.
3090       field(i)->containing_oneof()->DebugString(depth, contents,
3091                                                 debug_string_options);
3092     }
3093   }
3094 
3095   for (int i = 0; i < extension_range_count(); i++) {
3096     strings::SubstituteAndAppend(contents, "$0  extensions $1 to $2;\n", prefix,
3097                               extension_range(i)->start,
3098                               extension_range(i)->end - 1);
3099   }
3100 
3101   // Group extensions by what they extend, so they can be printed out together.
3102   const Descriptor* containing_type = nullptr;
3103   for (int i = 0; i < extension_count(); i++) {
3104     if (extension(i)->containing_type() != containing_type) {
3105       if (i > 0) strings::SubstituteAndAppend(contents, "$0  }\n", prefix);
3106       containing_type = extension(i)->containing_type();
3107       strings::SubstituteAndAppend(contents, "$0  extend .$1 {\n", prefix,
3108                                 containing_type->full_name());
3109     }
3110     extension(i)->DebugString(depth + 1, contents, debug_string_options);
3111   }
3112   if (extension_count() > 0)
3113     strings::SubstituteAndAppend(contents, "$0  }\n", prefix);
3114 
3115   if (reserved_range_count() > 0) {
3116     strings::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3117     for (int i = 0; i < reserved_range_count(); i++) {
3118       const Descriptor::ReservedRange* range = reserved_range(i);
3119       if (range->end == range->start + 1) {
3120         strings::SubstituteAndAppend(contents, "$0, ", range->start);
3121       } else if (range->end > FieldDescriptor::kMaxNumber) {
3122         strings::SubstituteAndAppend(contents, "$0 to max, ", range->start);
3123       } else {
3124         strings::SubstituteAndAppend(contents, "$0 to $1, ", range->start,
3125                                   range->end - 1);
3126       }
3127     }
3128     contents->replace(contents->size() - 2, 2, ";\n");
3129   }
3130 
3131   if (reserved_name_count() > 0) {
3132     strings::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3133     for (int i = 0; i < reserved_name_count(); i++) {
3134       strings::SubstituteAndAppend(contents, "\"$0\", ",
3135                                 CEscape(reserved_name(i)));
3136     }
3137     contents->replace(contents->size() - 2, 2, ";\n");
3138   }
3139 
3140   strings::SubstituteAndAppend(contents, "$0}\n", prefix);
3141   comment_printer.AddPostComment(contents);
3142 }
3143 
DebugString() const3144 std::string FieldDescriptor::DebugString() const {
3145   DebugStringOptions options;  // default options
3146   return DebugStringWithOptions(options);
3147 }
3148 
DebugStringWithOptions(const DebugStringOptions & debug_string_options) const3149 std::string FieldDescriptor::DebugStringWithOptions(
3150     const DebugStringOptions& debug_string_options) const {
3151   std::string contents;
3152   int depth = 0;
3153   if (is_extension()) {
3154     strings::SubstituteAndAppend(&contents, "extend .$0 {\n",
3155                               containing_type()->full_name());
3156     depth = 1;
3157   }
3158   DebugString(depth, &contents, debug_string_options);
3159   if (is_extension()) {
3160     contents.append("}\n");
3161   }
3162   return contents;
3163 }
3164 
3165 // The field type string used in FieldDescriptor::DebugString()
FieldTypeNameDebugString() const3166 std::string FieldDescriptor::FieldTypeNameDebugString() const {
3167   switch (type()) {
3168     case TYPE_MESSAGE:
3169       return "." + message_type()->full_name();
3170     case TYPE_ENUM:
3171       return "." + enum_type()->full_name();
3172     default:
3173       return kTypeToName[type()];
3174   }
3175 }
3176 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3177 void FieldDescriptor::DebugString(
3178     int depth, std::string* contents,
3179     const DebugStringOptions& debug_string_options) const {
3180   std::string prefix(depth * 2, ' ');
3181   std::string field_type;
3182 
3183   // Special case map fields.
3184   if (is_map()) {
3185     strings::SubstituteAndAppend(
3186         &field_type, "map<$0, $1>",
3187         message_type()->field(0)->FieldTypeNameDebugString(),
3188         message_type()->field(1)->FieldTypeNameDebugString());
3189   } else {
3190     field_type = FieldTypeNameDebugString();
3191   }
3192 
3193   std::string label = StrCat(kLabelToName[this->label()], " ");
3194 
3195   // Label is omitted for maps, oneof, and plain proto3 fields.
3196   if (is_map() || real_containing_oneof() ||
3197       (is_optional() && !has_optional_keyword())) {
3198     label.clear();
3199   }
3200 
3201   SourceLocationCommentPrinter comment_printer(this, prefix,
3202                                                debug_string_options);
3203   comment_printer.AddPreComment(contents);
3204 
3205   strings::SubstituteAndAppend(
3206       contents, "$0$1$2 $3 = $4", prefix, label, field_type,
3207       type() == TYPE_GROUP ? message_type()->name() : name(), number());
3208 
3209   bool bracketed = false;
3210   if (has_default_value()) {
3211     bracketed = true;
3212     strings::SubstituteAndAppend(contents, " [default = $0",
3213                               DefaultValueAsString(true));
3214   }
3215   if (has_json_name_) {
3216     if (!bracketed) {
3217       bracketed = true;
3218       contents->append(" [");
3219     } else {
3220       contents->append(", ");
3221     }
3222     contents->append("json_name = \"");
3223     contents->append(CEscape(json_name()));
3224     contents->append("\"");
3225   }
3226 
3227   std::string formatted_options;
3228   if (FormatBracketedOptions(depth, options(), file()->pool(),
3229                              &formatted_options)) {
3230     contents->append(bracketed ? ", " : " [");
3231     bracketed = true;
3232     contents->append(formatted_options);
3233   }
3234 
3235   if (bracketed) {
3236     contents->append("]");
3237   }
3238 
3239   if (type() == TYPE_GROUP) {
3240     if (debug_string_options.elide_group_body) {
3241       contents->append(" { ... };\n");
3242     } else {
3243       message_type()->DebugString(depth, contents, debug_string_options,
3244                                   /* include_opening_clause */ false);
3245     }
3246   } else {
3247     contents->append(";\n");
3248   }
3249 
3250   comment_printer.AddPostComment(contents);
3251 }
3252 
DebugString() const3253 std::string OneofDescriptor::DebugString() const {
3254   DebugStringOptions options;  // default values
3255   return DebugStringWithOptions(options);
3256 }
3257 
DebugStringWithOptions(const DebugStringOptions & options) const3258 std::string OneofDescriptor::DebugStringWithOptions(
3259     const DebugStringOptions& options) const {
3260   std::string contents;
3261   DebugString(0, &contents, options);
3262   return contents;
3263 }
3264 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3265 void OneofDescriptor::DebugString(
3266     int depth, std::string* contents,
3267     const DebugStringOptions& debug_string_options) const {
3268   std::string prefix(depth * 2, ' ');
3269   ++depth;
3270   SourceLocationCommentPrinter comment_printer(this, prefix,
3271                                                debug_string_options);
3272   comment_printer.AddPreComment(contents);
3273   strings::SubstituteAndAppend(contents, "$0oneof $1 {", prefix, name());
3274 
3275   FormatLineOptions(depth, options(), containing_type()->file()->pool(),
3276                     contents);
3277 
3278   if (debug_string_options.elide_oneof_body) {
3279     contents->append(" ... }\n");
3280   } else {
3281     contents->append("\n");
3282     for (int i = 0; i < field_count(); i++) {
3283       field(i)->DebugString(depth, contents, debug_string_options);
3284     }
3285     strings::SubstituteAndAppend(contents, "$0}\n", prefix);
3286   }
3287   comment_printer.AddPostComment(contents);
3288 }
3289 
DebugString() const3290 std::string EnumDescriptor::DebugString() const {
3291   DebugStringOptions options;  // default values
3292   return DebugStringWithOptions(options);
3293 }
3294 
DebugStringWithOptions(const DebugStringOptions & options) const3295 std::string EnumDescriptor::DebugStringWithOptions(
3296     const DebugStringOptions& options) const {
3297   std::string contents;
3298   DebugString(0, &contents, options);
3299   return contents;
3300 }
3301 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3302 void EnumDescriptor::DebugString(
3303     int depth, std::string* contents,
3304     const DebugStringOptions& debug_string_options) const {
3305   std::string prefix(depth * 2, ' ');
3306   ++depth;
3307 
3308   SourceLocationCommentPrinter comment_printer(this, prefix,
3309                                                debug_string_options);
3310   comment_printer.AddPreComment(contents);
3311 
3312   strings::SubstituteAndAppend(contents, "$0enum $1 {\n", prefix, name());
3313 
3314   FormatLineOptions(depth, options(), file()->pool(), contents);
3315 
3316   for (int i = 0; i < value_count(); i++) {
3317     value(i)->DebugString(depth, contents, debug_string_options);
3318   }
3319 
3320   if (reserved_range_count() > 0) {
3321     strings::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3322     for (int i = 0; i < reserved_range_count(); i++) {
3323       const EnumDescriptor::ReservedRange* range = reserved_range(i);
3324       if (range->end == range->start) {
3325         strings::SubstituteAndAppend(contents, "$0, ", range->start);
3326       } else if (range->end == INT_MAX) {
3327         strings::SubstituteAndAppend(contents, "$0 to max, ", range->start);
3328       } else {
3329         strings::SubstituteAndAppend(contents, "$0 to $1, ", range->start,
3330                                   range->end);
3331       }
3332     }
3333     contents->replace(contents->size() - 2, 2, ";\n");
3334   }
3335 
3336   if (reserved_name_count() > 0) {
3337     strings::SubstituteAndAppend(contents, "$0  reserved ", prefix);
3338     for (int i = 0; i < reserved_name_count(); i++) {
3339       strings::SubstituteAndAppend(contents, "\"$0\", ",
3340                                 CEscape(reserved_name(i)));
3341     }
3342     contents->replace(contents->size() - 2, 2, ";\n");
3343   }
3344 
3345   strings::SubstituteAndAppend(contents, "$0}\n", prefix);
3346 
3347   comment_printer.AddPostComment(contents);
3348 }
3349 
DebugString() const3350 std::string EnumValueDescriptor::DebugString() const {
3351   DebugStringOptions options;  // default values
3352   return DebugStringWithOptions(options);
3353 }
3354 
DebugStringWithOptions(const DebugStringOptions & options) const3355 std::string EnumValueDescriptor::DebugStringWithOptions(
3356     const DebugStringOptions& options) const {
3357   std::string contents;
3358   DebugString(0, &contents, options);
3359   return contents;
3360 }
3361 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3362 void EnumValueDescriptor::DebugString(
3363     int depth, std::string* contents,
3364     const DebugStringOptions& debug_string_options) const {
3365   std::string prefix(depth * 2, ' ');
3366 
3367   SourceLocationCommentPrinter comment_printer(this, prefix,
3368                                                debug_string_options);
3369   comment_printer.AddPreComment(contents);
3370 
3371   strings::SubstituteAndAppend(contents, "$0$1 = $2", prefix, name(), number());
3372 
3373   std::string formatted_options;
3374   if (FormatBracketedOptions(depth, options(), type()->file()->pool(),
3375                              &formatted_options)) {
3376     strings::SubstituteAndAppend(contents, " [$0]", formatted_options);
3377   }
3378   contents->append(";\n");
3379 
3380   comment_printer.AddPostComment(contents);
3381 }
3382 
DebugString() const3383 std::string ServiceDescriptor::DebugString() const {
3384   DebugStringOptions options;  // default values
3385   return DebugStringWithOptions(options);
3386 }
3387 
DebugStringWithOptions(const DebugStringOptions & options) const3388 std::string ServiceDescriptor::DebugStringWithOptions(
3389     const DebugStringOptions& options) const {
3390   std::string contents;
3391   DebugString(&contents, options);
3392   return contents;
3393 }
3394 
DebugString(std::string * contents,const DebugStringOptions & debug_string_options) const3395 void ServiceDescriptor::DebugString(
3396     std::string* contents,
3397     const DebugStringOptions& debug_string_options) const {
3398   SourceLocationCommentPrinter comment_printer(this, /* prefix */ "",
3399                                                debug_string_options);
3400   comment_printer.AddPreComment(contents);
3401 
3402   strings::SubstituteAndAppend(contents, "service $0 {\n", name());
3403 
3404   FormatLineOptions(1, options(), file()->pool(), contents);
3405 
3406   for (int i = 0; i < method_count(); i++) {
3407     method(i)->DebugString(1, contents, debug_string_options);
3408   }
3409 
3410   contents->append("}\n");
3411 
3412   comment_printer.AddPostComment(contents);
3413 }
3414 
DebugString() const3415 std::string MethodDescriptor::DebugString() const {
3416   DebugStringOptions options;  // default values
3417   return DebugStringWithOptions(options);
3418 }
3419 
DebugStringWithOptions(const DebugStringOptions & options) const3420 std::string MethodDescriptor::DebugStringWithOptions(
3421     const DebugStringOptions& options) const {
3422   std::string contents;
3423   DebugString(0, &contents, options);
3424   return contents;
3425 }
3426 
DebugString(int depth,std::string * contents,const DebugStringOptions & debug_string_options) const3427 void MethodDescriptor::DebugString(
3428     int depth, std::string* contents,
3429     const DebugStringOptions& debug_string_options) const {
3430   std::string prefix(depth * 2, ' ');
3431   ++depth;
3432 
3433   SourceLocationCommentPrinter comment_printer(this, prefix,
3434                                                debug_string_options);
3435   comment_printer.AddPreComment(contents);
3436 
3437   strings::SubstituteAndAppend(
3438       contents, "$0rpc $1($4.$2) returns ($5.$3)", prefix, name(),
3439       input_type()->full_name(), output_type()->full_name(),
3440       client_streaming() ? "stream " : "", server_streaming() ? "stream " : "");
3441 
3442   std::string formatted_options;
3443   if (FormatLineOptions(depth, options(), service()->file()->pool(),
3444                         &formatted_options)) {
3445     strings::SubstituteAndAppend(contents, " {\n$0$1}\n", formatted_options,
3446                               prefix);
3447   } else {
3448     contents->append(";\n");
3449   }
3450 
3451   comment_printer.AddPostComment(contents);
3452 }
3453 
3454 
3455 // Location methods ===============================================
3456 
GetSourceLocation(const std::vector<int> & path,SourceLocation * out_location) const3457 bool FileDescriptor::GetSourceLocation(const std::vector<int>& path,
3458                                        SourceLocation* out_location) const {
3459   GOOGLE_CHECK(out_location != nullptr);
3460   if (source_code_info_) {
3461     if (const SourceCodeInfo_Location* loc =
3462             tables_->GetSourceLocation(path, source_code_info_)) {
3463       const RepeatedField<int32_t>& span = loc->span();
3464       if (span.size() == 3 || span.size() == 4) {
3465         out_location->start_line = span.Get(0);
3466         out_location->start_column = span.Get(1);
3467         out_location->end_line = span.Get(span.size() == 3 ? 0 : 2);
3468         out_location->end_column = span.Get(span.size() - 1);
3469 
3470         out_location->leading_comments = loc->leading_comments();
3471         out_location->trailing_comments = loc->trailing_comments();
3472         out_location->leading_detached_comments.assign(
3473             loc->leading_detached_comments().begin(),
3474             loc->leading_detached_comments().end());
3475         return true;
3476       }
3477     }
3478   }
3479   return false;
3480 }
3481 
GetSourceLocation(SourceLocation * out_location) const3482 bool FileDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3483   std::vector<int> path;  // empty path for root FileDescriptor
3484   return GetSourceLocation(path, out_location);
3485 }
3486 
is_packed() const3487 bool FieldDescriptor::is_packed() const {
3488   if (!is_packable()) return false;
3489   if (file_->syntax() == FileDescriptor::SYNTAX_PROTO2) {
3490     return (options_ != nullptr) && options_->packed();
3491   } else {
3492     return options_ == nullptr || !options_->has_packed() || options_->packed();
3493   }
3494 }
3495 
GetSourceLocation(SourceLocation * out_location) const3496 bool Descriptor::GetSourceLocation(SourceLocation* out_location) const {
3497   std::vector<int> path;
3498   GetLocationPath(&path);
3499   return file()->GetSourceLocation(path, out_location);
3500 }
3501 
GetSourceLocation(SourceLocation * out_location) const3502 bool FieldDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3503   std::vector<int> path;
3504   GetLocationPath(&path);
3505   return file()->GetSourceLocation(path, out_location);
3506 }
3507 
GetSourceLocation(SourceLocation * out_location) const3508 bool OneofDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3509   std::vector<int> path;
3510   GetLocationPath(&path);
3511   return containing_type()->file()->GetSourceLocation(path, out_location);
3512 }
3513 
GetSourceLocation(SourceLocation * out_location) const3514 bool EnumDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3515   std::vector<int> path;
3516   GetLocationPath(&path);
3517   return file()->GetSourceLocation(path, out_location);
3518 }
3519 
GetSourceLocation(SourceLocation * out_location) const3520 bool MethodDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3521   std::vector<int> path;
3522   GetLocationPath(&path);
3523   return service()->file()->GetSourceLocation(path, out_location);
3524 }
3525 
GetSourceLocation(SourceLocation * out_location) const3526 bool ServiceDescriptor::GetSourceLocation(SourceLocation* out_location) const {
3527   std::vector<int> path;
3528   GetLocationPath(&path);
3529   return file()->GetSourceLocation(path, out_location);
3530 }
3531 
GetSourceLocation(SourceLocation * out_location) const3532 bool EnumValueDescriptor::GetSourceLocation(
3533     SourceLocation* out_location) const {
3534   std::vector<int> path;
3535   GetLocationPath(&path);
3536   return type()->file()->GetSourceLocation(path, out_location);
3537 }
3538 
GetLocationPath(std::vector<int> * output) const3539 void Descriptor::GetLocationPath(std::vector<int>* output) const {
3540   if (containing_type()) {
3541     containing_type()->GetLocationPath(output);
3542     output->push_back(DescriptorProto::kNestedTypeFieldNumber);
3543     output->push_back(index());
3544   } else {
3545     output->push_back(FileDescriptorProto::kMessageTypeFieldNumber);
3546     output->push_back(index());
3547   }
3548 }
3549 
GetLocationPath(std::vector<int> * output) const3550 void FieldDescriptor::GetLocationPath(std::vector<int>* output) const {
3551   if (is_extension()) {
3552     if (extension_scope() == nullptr) {
3553       output->push_back(FileDescriptorProto::kExtensionFieldNumber);
3554       output->push_back(index());
3555     } else {
3556       extension_scope()->GetLocationPath(output);
3557       output->push_back(DescriptorProto::kExtensionFieldNumber);
3558       output->push_back(index());
3559     }
3560   } else {
3561     containing_type()->GetLocationPath(output);
3562     output->push_back(DescriptorProto::kFieldFieldNumber);
3563     output->push_back(index());
3564   }
3565 }
3566 
GetLocationPath(std::vector<int> * output) const3567 void OneofDescriptor::GetLocationPath(std::vector<int>* output) const {
3568   containing_type()->GetLocationPath(output);
3569   output->push_back(DescriptorProto::kOneofDeclFieldNumber);
3570   output->push_back(index());
3571 }
3572 
GetLocationPath(std::vector<int> * output) const3573 void EnumDescriptor::GetLocationPath(std::vector<int>* output) const {
3574   if (containing_type()) {
3575     containing_type()->GetLocationPath(output);
3576     output->push_back(DescriptorProto::kEnumTypeFieldNumber);
3577     output->push_back(index());
3578   } else {
3579     output->push_back(FileDescriptorProto::kEnumTypeFieldNumber);
3580     output->push_back(index());
3581   }
3582 }
3583 
GetLocationPath(std::vector<int> * output) const3584 void EnumValueDescriptor::GetLocationPath(std::vector<int>* output) const {
3585   type()->GetLocationPath(output);
3586   output->push_back(EnumDescriptorProto::kValueFieldNumber);
3587   output->push_back(index());
3588 }
3589 
GetLocationPath(std::vector<int> * output) const3590 void ServiceDescriptor::GetLocationPath(std::vector<int>* output) const {
3591   output->push_back(FileDescriptorProto::kServiceFieldNumber);
3592   output->push_back(index());
3593 }
3594 
GetLocationPath(std::vector<int> * output) const3595 void MethodDescriptor::GetLocationPath(std::vector<int>* output) const {
3596   service()->GetLocationPath(output);
3597   output->push_back(ServiceDescriptorProto::kMethodFieldNumber);
3598   output->push_back(index());
3599 }
3600 
3601 // ===================================================================
3602 
3603 namespace {
3604 
3605 // Represents an options message to interpret. Extension names in the option
3606 // name are resolved relative to name_scope. element_name and orig_opt are
3607 // used only for error reporting (since the parser records locations against
3608 // pointers in the original options, not the mutable copy). The Message must be
3609 // one of the Options messages in descriptor.proto.
3610 struct OptionsToInterpret {
OptionsToInterpretgoogle::protobuf::__anonc1b606e10611::OptionsToInterpret3611   OptionsToInterpret(const std::string& ns, const std::string& el,
3612                      const std::vector<int>& path, const Message* orig_opt,
3613                      Message* opt)
3614       : name_scope(ns),
3615         element_name(el),
3616         element_path(path),
3617         original_options(orig_opt),
3618         options(opt) {}
3619   std::string name_scope;
3620   std::string element_name;
3621   std::vector<int> element_path;
3622   const Message* original_options;
3623   Message* options;
3624 };
3625 
3626 }  // namespace
3627 
3628 class DescriptorBuilder {
3629  public:
3630   DescriptorBuilder(const DescriptorPool* pool, DescriptorPool::Tables* tables,
3631                     DescriptorPool::ErrorCollector* error_collector);
3632   ~DescriptorBuilder();
3633 
3634   const FileDescriptor* BuildFile(const FileDescriptorProto& proto);
3635 
3636  private:
3637   friend class OptionInterpreter;
3638 
3639   // Non-recursive part of BuildFile functionality.
3640   FileDescriptor* BuildFileImpl(const FileDescriptorProto& proto);
3641 
3642   const DescriptorPool* pool_;
3643   DescriptorPool::Tables* tables_;  // for convenience
3644   DescriptorPool::ErrorCollector* error_collector_;
3645 
3646   // As we build descriptors we store copies of the options messages in
3647   // them. We put pointers to those copies in this vector, as we build, so we
3648   // can later (after cross-linking) interpret those options.
3649   std::vector<OptionsToInterpret> options_to_interpret_;
3650 
3651   bool had_errors_;
3652   std::string filename_;
3653   FileDescriptor* file_;
3654   FileDescriptorTables* file_tables_;
3655   std::set<const FileDescriptor*> dependencies_;
3656 
3657   // unused_dependency_ is used to record the unused imported files.
3658   // Note: public import is not considered.
3659   std::set<const FileDescriptor*> unused_dependency_;
3660 
3661   // If LookupSymbol() finds a symbol that is in a file which is not a declared
3662   // dependency of this file, it will fail, but will set
3663   // possible_undeclared_dependency_ to point at that file.  This is only used
3664   // by AddNotDefinedError() to report a more useful error message.
3665   // possible_undeclared_dependency_name_ is the name of the symbol that was
3666   // actually found in possible_undeclared_dependency_, which may be a parent
3667   // of the symbol actually looked for.
3668   const FileDescriptor* possible_undeclared_dependency_;
3669   std::string possible_undeclared_dependency_name_;
3670 
3671   // If LookupSymbol() could resolve a symbol which is not defined,
3672   // record the resolved name.  This is only used by AddNotDefinedError()
3673   // to report a more useful error message.
3674   std::string undefine_resolved_name_;
3675 
3676   void AddError(const std::string& element_name, const Message& descriptor,
3677                 DescriptorPool::ErrorCollector::ErrorLocation location,
3678                 const std::string& error);
3679   void AddError(const std::string& element_name, const Message& descriptor,
3680                 DescriptorPool::ErrorCollector::ErrorLocation location,
3681                 const char* error);
3682   void AddRecursiveImportError(const FileDescriptorProto& proto, int from_here);
3683   void AddTwiceListedError(const FileDescriptorProto& proto, int index);
3684   void AddImportError(const FileDescriptorProto& proto, int index);
3685 
3686   // Adds an error indicating that undefined_symbol was not defined.  Must
3687   // only be called after LookupSymbol() fails.
3688   void AddNotDefinedError(
3689       const std::string& element_name, const Message& descriptor,
3690       DescriptorPool::ErrorCollector::ErrorLocation location,
3691       const std::string& undefined_symbol);
3692 
3693   void AddWarning(const std::string& element_name, const Message& descriptor,
3694                   DescriptorPool::ErrorCollector::ErrorLocation location,
3695                   const std::string& error);
3696 
3697   // Silly helper which determines if the given file is in the given package.
3698   // I.e., either file->package() == package_name or file->package() is a
3699   // nested package within package_name.
3700   bool IsInPackage(const FileDescriptor* file, const std::string& package_name);
3701 
3702   // Helper function which finds all public dependencies of the given file, and
3703   // stores the them in the dependencies_ set in the builder.
3704   void RecordPublicDependencies(const FileDescriptor* file);
3705 
3706   // Like tables_->FindSymbol(), but additionally:
3707   // - Search the pool's underlay if not found in tables_.
3708   // - Insure that the resulting Symbol is from one of the file's declared
3709   //   dependencies.
3710   Symbol FindSymbol(const std::string& name, bool build_it = true);
3711 
3712   // Like FindSymbol() but does not require that the symbol is in one of the
3713   // file's declared dependencies.
3714   Symbol FindSymbolNotEnforcingDeps(const std::string& name,
3715                                     bool build_it = true);
3716 
3717   // This implements the body of FindSymbolNotEnforcingDeps().
3718   Symbol FindSymbolNotEnforcingDepsHelper(const DescriptorPool* pool,
3719                                           const std::string& name,
3720                                           bool build_it = true);
3721 
3722   // Like FindSymbol(), but looks up the name relative to some other symbol
3723   // name.  This first searches siblings of relative_to, then siblings of its
3724   // parents, etc.  For example, LookupSymbol("foo.bar", "baz.qux.corge") makes
3725   // the following calls, returning the first non-null result:
3726   // FindSymbol("baz.qux.foo.bar"), FindSymbol("baz.foo.bar"),
3727   // FindSymbol("foo.bar").  If AllowUnknownDependencies() has been called
3728   // on the DescriptorPool, this will generate a placeholder type if
3729   // the name is not found (unless the name itself is malformed).  The
3730   // placeholder_type parameter indicates what kind of placeholder should be
3731   // constructed in this case.  The resolve_mode parameter determines whether
3732   // any symbol is returned, or only symbols that are types.  Note, however,
3733   // that LookupSymbol may still return a non-type symbol in LOOKUP_TYPES mode,
3734   // if it believes that's all it could refer to.  The caller should always
3735   // check that it receives the type of symbol it was expecting.
3736   enum ResolveMode { LOOKUP_ALL, LOOKUP_TYPES };
3737   Symbol LookupSymbol(const std::string& name, const std::string& relative_to,
3738                       DescriptorPool::PlaceholderType placeholder_type =
3739                           DescriptorPool::PLACEHOLDER_MESSAGE,
3740                       ResolveMode resolve_mode = LOOKUP_ALL,
3741                       bool build_it = true);
3742 
3743   // Like LookupSymbol() but will not return a placeholder even if
3744   // AllowUnknownDependencies() has been used.
3745   Symbol LookupSymbolNoPlaceholder(const std::string& name,
3746                                    const std::string& relative_to,
3747                                    ResolveMode resolve_mode = LOOKUP_ALL,
3748                                    bool build_it = true);
3749 
3750   // Calls tables_->AddSymbol() and records an error if it fails.  Returns
3751   // true if successful or false if failed, though most callers can ignore
3752   // the return value since an error has already been recorded.
3753   bool AddSymbol(const std::string& full_name, const void* parent,
3754                  const std::string& name, const Message& proto, Symbol symbol);
3755 
3756   // Like AddSymbol(), but succeeds if the symbol is already defined as long
3757   // as the existing definition is also a package (because it's OK to define
3758   // the same package in two different files).  Also adds all parents of the
3759   // package to the symbol table (e.g. AddPackage("foo.bar", ...) will add
3760   // "foo.bar" and "foo" to the table).
3761   void AddPackage(const std::string& name, const Message& proto,
3762                   FileDescriptor* file);
3763 
3764   // Checks that the symbol name contains only alphanumeric characters and
3765   // underscores.  Records an error otherwise.
3766   void ValidateSymbolName(const std::string& name, const std::string& full_name,
3767                           const Message& proto);
3768 
3769   // Used by BUILD_ARRAY macro (below) to avoid having to have the type
3770   // specified as a macro parameter.
3771   template <typename Type>
AllocateArray(int size,Type ** output)3772   inline void AllocateArray(int size, Type** output) {
3773     *output = tables_->AllocateArray<Type>(size);
3774   }
3775 
3776   // Allocates a copy of orig_options in tables_ and stores it in the
3777   // descriptor. Remembers its uninterpreted options, to be interpreted
3778   // later. DescriptorT must be one of the Descriptor messages from
3779   // descriptor.proto.
3780   template <class DescriptorT>
3781   void AllocateOptions(const typename DescriptorT::OptionsType& orig_options,
3782                        DescriptorT* descriptor, int options_field_tag,
3783                        const std::string& option_name);
3784   // Specialization for FileOptions.
3785   void AllocateOptions(const FileOptions& orig_options,
3786                        FileDescriptor* descriptor);
3787 
3788   // Implementation for AllocateOptions(). Don't call this directly.
3789   template <class DescriptorT>
3790   void AllocateOptionsImpl(
3791       const std::string& name_scope, const std::string& element_name,
3792       const typename DescriptorT::OptionsType& orig_options,
3793       DescriptorT* descriptor, const std::vector<int>& options_path,
3794       const std::string& option_name);
3795 
3796   // Allocates an array of two strings, the first one is a copy of `proto_name`,
3797   // and the second one is the full name.
3798   // Full proto name is "scope.proto_name" if scope is non-empty and
3799   // "proto_name" otherwise.
3800   const std::string* AllocateNameStrings(const std::string& scope,
3801                                          const std::string& proto_name);
3802 
3803   // These methods all have the same signature for the sake of the BUILD_ARRAY
3804   // macro, below.
3805   void BuildMessage(const DescriptorProto& proto, const Descriptor* parent,
3806                     Descriptor* result);
3807   void BuildFieldOrExtension(const FieldDescriptorProto& proto,
3808                              Descriptor* parent, FieldDescriptor* result,
3809                              bool is_extension);
BuildField(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result)3810   void BuildField(const FieldDescriptorProto& proto, Descriptor* parent,
3811                   FieldDescriptor* result) {
3812     BuildFieldOrExtension(proto, parent, result, false);
3813   }
BuildExtension(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result)3814   void BuildExtension(const FieldDescriptorProto& proto, Descriptor* parent,
3815                       FieldDescriptor* result) {
3816     BuildFieldOrExtension(proto, parent, result, true);
3817   }
3818   void BuildExtensionRange(const DescriptorProto::ExtensionRange& proto,
3819                            const Descriptor* parent,
3820                            Descriptor::ExtensionRange* result);
3821   void BuildReservedRange(const DescriptorProto::ReservedRange& proto,
3822                           const Descriptor* parent,
3823                           Descriptor::ReservedRange* result);
3824   void BuildReservedRange(const EnumDescriptorProto::EnumReservedRange& proto,
3825                           const EnumDescriptor* parent,
3826                           EnumDescriptor::ReservedRange* result);
3827   void BuildOneof(const OneofDescriptorProto& proto, Descriptor* parent,
3828                   OneofDescriptor* result);
3829   void CheckEnumValueUniqueness(const EnumDescriptorProto& proto,
3830                                 const EnumDescriptor* result);
3831   void BuildEnum(const EnumDescriptorProto& proto, const Descriptor* parent,
3832                  EnumDescriptor* result);
3833   void BuildEnumValue(const EnumValueDescriptorProto& proto,
3834                       const EnumDescriptor* parent,
3835                       EnumValueDescriptor* result);
3836   void BuildService(const ServiceDescriptorProto& proto, const void* dummy,
3837                     ServiceDescriptor* result);
3838   void BuildMethod(const MethodDescriptorProto& proto,
3839                    const ServiceDescriptor* parent, MethodDescriptor* result);
3840 
3841   void LogUnusedDependency(const FileDescriptorProto& proto,
3842                            const FileDescriptor* result);
3843 
3844   // Must be run only after building.
3845   //
3846   // NOTE: Options will not be available during cross-linking, as they
3847   // have not yet been interpreted. Defer any handling of options to the
3848   // Validate*Options methods.
3849   void CrossLinkFile(FileDescriptor* file, const FileDescriptorProto& proto);
3850   void CrossLinkMessage(Descriptor* message, const DescriptorProto& proto);
3851   void CrossLinkField(FieldDescriptor* field,
3852                       const FieldDescriptorProto& proto);
3853   void CrossLinkExtensionRange(Descriptor::ExtensionRange* range,
3854                                const DescriptorProto::ExtensionRange& proto);
3855   void CrossLinkEnum(EnumDescriptor* enum_type,
3856                      const EnumDescriptorProto& proto);
3857   void CrossLinkEnumValue(EnumValueDescriptor* enum_value,
3858                           const EnumValueDescriptorProto& proto);
3859   void CrossLinkService(ServiceDescriptor* service,
3860                         const ServiceDescriptorProto& proto);
3861   void CrossLinkMethod(MethodDescriptor* method,
3862                        const MethodDescriptorProto& proto);
3863 
3864   // Must be run only after cross-linking.
3865   void InterpretOptions();
3866 
3867   // A helper class for interpreting options.
3868   class OptionInterpreter {
3869    public:
3870     // Creates an interpreter that operates in the context of the pool of the
3871     // specified builder, which must not be nullptr. We don't take ownership of
3872     // the builder.
3873     explicit OptionInterpreter(DescriptorBuilder* builder);
3874 
3875     ~OptionInterpreter();
3876 
3877     // Interprets the uninterpreted options in the specified Options message.
3878     // On error, calls AddError() on the underlying builder and returns false.
3879     // Otherwise returns true.
3880     bool InterpretOptions(OptionsToInterpret* options_to_interpret);
3881 
3882     // Updates the given source code info by re-writing uninterpreted option
3883     // locations to refer to the corresponding interpreted option.
3884     void UpdateSourceCodeInfo(SourceCodeInfo* info);
3885 
3886     class AggregateOptionFinder;
3887 
3888    private:
3889     // Interprets uninterpreted_option_ on the specified message, which
3890     // must be the mutable copy of the original options message to which
3891     // uninterpreted_option_ belongs. The given src_path is the source
3892     // location path to the uninterpreted option, and options_path is the
3893     // source location path to the options message. The location paths are
3894     // recorded and then used in UpdateSourceCodeInfo.
3895     bool InterpretSingleOption(Message* options,
3896                                const std::vector<int>& src_path,
3897                                const std::vector<int>& options_path);
3898 
3899     // Adds the uninterpreted_option to the given options message verbatim.
3900     // Used when AllowUnknownDependencies() is in effect and we can't find
3901     // the option's definition.
3902     void AddWithoutInterpreting(const UninterpretedOption& uninterpreted_option,
3903                                 Message* options);
3904 
3905     // A recursive helper function that drills into the intermediate fields
3906     // in unknown_fields to check if field innermost_field is set on the
3907     // innermost message. Returns false and sets an error if so.
3908     bool ExamineIfOptionIsSet(
3909         std::vector<const FieldDescriptor*>::const_iterator
3910             intermediate_fields_iter,
3911         std::vector<const FieldDescriptor*>::const_iterator
3912             intermediate_fields_end,
3913         const FieldDescriptor* innermost_field,
3914         const std::string& debug_msg_name,
3915         const UnknownFieldSet& unknown_fields);
3916 
3917     // Validates the value for the option field of the currently interpreted
3918     // option and then sets it on the unknown_field.
3919     bool SetOptionValue(const FieldDescriptor* option_field,
3920                         UnknownFieldSet* unknown_fields);
3921 
3922     // Parses an aggregate value for a CPPTYPE_MESSAGE option and
3923     // saves it into *unknown_fields.
3924     bool SetAggregateOption(const FieldDescriptor* option_field,
3925                             UnknownFieldSet* unknown_fields);
3926 
3927     // Convenience functions to set an int field the right way, depending on
3928     // its wire type (a single int CppType can represent multiple wire types).
3929     void SetInt32(int number, int32_t value, FieldDescriptor::Type type,
3930                   UnknownFieldSet* unknown_fields);
3931     void SetInt64(int number, int64_t value, FieldDescriptor::Type type,
3932                   UnknownFieldSet* unknown_fields);
3933     void SetUInt32(int number, uint32_t value, FieldDescriptor::Type type,
3934                    UnknownFieldSet* unknown_fields);
3935     void SetUInt64(int number, uint64_t value, FieldDescriptor::Type type,
3936                    UnknownFieldSet* unknown_fields);
3937 
3938     // A helper function that adds an error at the specified location of the
3939     // option we're currently interpreting, and returns false.
AddOptionError(DescriptorPool::ErrorCollector::ErrorLocation location,const std::string & msg)3940     bool AddOptionError(DescriptorPool::ErrorCollector::ErrorLocation location,
3941                         const std::string& msg) {
3942       builder_->AddError(options_to_interpret_->element_name,
3943                          *uninterpreted_option_, location, msg);
3944       return false;
3945     }
3946 
3947     // A helper function that adds an error at the location of the option name
3948     // and returns false.
AddNameError(const std::string & msg)3949     bool AddNameError(const std::string& msg) {
3950 #ifdef PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
3951       return true;
3952 #else   // PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
3953       return AddOptionError(DescriptorPool::ErrorCollector::OPTION_NAME, msg);
3954 #endif  // PROTOBUF_INTERNAL_IGNORE_FIELD_NAME_ERRORS_
3955     }
3956 
3957     // A helper function that adds an error at the location of the option name
3958     // and returns false.
AddValueError(const std::string & msg)3959     bool AddValueError(const std::string& msg) {
3960       return AddOptionError(DescriptorPool::ErrorCollector::OPTION_VALUE, msg);
3961     }
3962 
3963     // We interpret against this builder's pool. Is never nullptr. We don't own
3964     // this pointer.
3965     DescriptorBuilder* builder_;
3966 
3967     // The options we're currently interpreting, or nullptr if we're not in a
3968     // call to InterpretOptions.
3969     const OptionsToInterpret* options_to_interpret_;
3970 
3971     // The option we're currently interpreting within options_to_interpret_, or
3972     // nullptr if we're not in a call to InterpretOptions(). This points to a
3973     // submessage of the original option, not the mutable copy. Therefore we
3974     // can use it to find locations recorded by the parser.
3975     const UninterpretedOption* uninterpreted_option_;
3976 
3977     // This maps the element path of uninterpreted options to the element path
3978     // of the resulting interpreted option. This is used to modify a file's
3979     // source code info to account for option interpretation.
3980     std::map<std::vector<int>, std::vector<int>> interpreted_paths_;
3981 
3982     // This maps the path to a repeated option field to the known number of
3983     // elements the field contains. This is used to track the compute the
3984     // index portion of the element path when interpreting a single option.
3985     std::map<std::vector<int>, int> repeated_option_counts_;
3986 
3987     // Factory used to create the dynamic messages we need to parse
3988     // any aggregate option values we encounter.
3989     DynamicMessageFactory dynamic_factory_;
3990 
3991     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OptionInterpreter);
3992   };
3993 
3994   // Work-around for broken compilers:  According to the C++ standard,
3995   // OptionInterpreter should have access to the private members of any class
3996   // which has declared DescriptorBuilder as a friend.  Unfortunately some old
3997   // versions of GCC and other compilers do not implement this correctly.  So,
3998   // we have to have these intermediate methods to provide access.  We also
3999   // redundantly declare OptionInterpreter a friend just to make things extra
4000   // clear for these bad compilers.
4001   friend class OptionInterpreter;
4002   friend class OptionInterpreter::AggregateOptionFinder;
4003 
get_allow_unknown(const DescriptorPool * pool)4004   static inline bool get_allow_unknown(const DescriptorPool* pool) {
4005     return pool->allow_unknown_;
4006   }
get_enforce_weak(const DescriptorPool * pool)4007   static inline bool get_enforce_weak(const DescriptorPool* pool) {
4008     return pool->enforce_weak_;
4009   }
get_is_placeholder(const Descriptor * descriptor)4010   static inline bool get_is_placeholder(const Descriptor* descriptor) {
4011     return descriptor != nullptr && descriptor->is_placeholder_;
4012   }
assert_mutex_held(const DescriptorPool * pool)4013   static inline void assert_mutex_held(const DescriptorPool* pool) {
4014     if (pool->mutex_ != nullptr) {
4015       pool->mutex_->AssertHeld();
4016     }
4017   }
4018 
4019   // Must be run only after options have been interpreted.
4020   //
4021   // NOTE: Validation code must only reference the options in the mutable
4022   // descriptors, which are the ones that have been interpreted. The const
4023   // proto references are passed in only so they can be provided to calls to
4024   // AddError(). Do not look at their options, which have not been interpreted.
4025   void ValidateFileOptions(FileDescriptor* file,
4026                            const FileDescriptorProto& proto);
4027   void ValidateMessageOptions(Descriptor* message,
4028                               const DescriptorProto& proto);
4029   void ValidateFieldOptions(FieldDescriptor* field,
4030                             const FieldDescriptorProto& proto);
4031   void ValidateEnumOptions(EnumDescriptor* enm,
4032                            const EnumDescriptorProto& proto);
4033   void ValidateEnumValueOptions(EnumValueDescriptor* enum_value,
4034                                 const EnumValueDescriptorProto& proto);
4035   void ValidateExtensionRangeOptions(
4036       const std::string& full_name, Descriptor::ExtensionRange* extension_range,
4037       const DescriptorProto_ExtensionRange& proto);
4038   void ValidateServiceOptions(ServiceDescriptor* service,
4039                               const ServiceDescriptorProto& proto);
4040   void ValidateMethodOptions(MethodDescriptor* method,
4041                              const MethodDescriptorProto& proto);
4042   void ValidateProto3(FileDescriptor* file, const FileDescriptorProto& proto);
4043   void ValidateProto3Message(Descriptor* message, const DescriptorProto& proto);
4044   void ValidateProto3Field(FieldDescriptor* field,
4045                            const FieldDescriptorProto& proto);
4046   void ValidateProto3Enum(EnumDescriptor* enm,
4047                           const EnumDescriptorProto& proto);
4048 
4049   // Returns true if the map entry message is compatible with the
4050   // auto-generated entry message from map fields syntax.
4051   bool ValidateMapEntry(FieldDescriptor* field,
4052                         const FieldDescriptorProto& proto);
4053 
4054   // Recursively detects naming conflicts with map entry types for a
4055   // better error message.
4056   void DetectMapConflicts(const Descriptor* message,
4057                           const DescriptorProto& proto);
4058 
4059   void ValidateJSType(FieldDescriptor* field,
4060                       const FieldDescriptorProto& proto);
4061 };
4062 
BuildFile(const FileDescriptorProto & proto)4063 const FileDescriptor* DescriptorPool::BuildFile(
4064     const FileDescriptorProto& proto) {
4065   GOOGLE_CHECK(fallback_database_ == nullptr)
4066       << "Cannot call BuildFile on a DescriptorPool that uses a "
4067          "DescriptorDatabase.  You must instead find a way to get your file "
4068          "into the underlying database.";
4069   GOOGLE_CHECK(mutex_ == nullptr);  // Implied by the above GOOGLE_CHECK.
4070   tables_->known_bad_symbols_.clear();
4071   tables_->known_bad_files_.clear();
4072   return DescriptorBuilder(this, tables_.get(), nullptr).BuildFile(proto);
4073 }
4074 
BuildFileCollectingErrors(const FileDescriptorProto & proto,ErrorCollector * error_collector)4075 const FileDescriptor* DescriptorPool::BuildFileCollectingErrors(
4076     const FileDescriptorProto& proto, ErrorCollector* error_collector) {
4077   GOOGLE_CHECK(fallback_database_ == nullptr)
4078       << "Cannot call BuildFile on a DescriptorPool that uses a "
4079          "DescriptorDatabase.  You must instead find a way to get your file "
4080          "into the underlying database.";
4081   GOOGLE_CHECK(mutex_ == nullptr);  // Implied by the above GOOGLE_CHECK.
4082   tables_->known_bad_symbols_.clear();
4083   tables_->known_bad_files_.clear();
4084   return DescriptorBuilder(this, tables_.get(), error_collector)
4085       .BuildFile(proto);
4086 }
4087 
BuildFileFromDatabase(const FileDescriptorProto & proto) const4088 const FileDescriptor* DescriptorPool::BuildFileFromDatabase(
4089     const FileDescriptorProto& proto) const {
4090   mutex_->AssertHeld();
4091   if (tables_->known_bad_files_.count(proto.name()) > 0) {
4092     return nullptr;
4093   }
4094   const FileDescriptor* result =
4095       DescriptorBuilder(this, tables_.get(), default_error_collector_)
4096           .BuildFile(proto);
4097   if (result == nullptr) {
4098     tables_->known_bad_files_.insert(proto.name());
4099   }
4100   return result;
4101 }
4102 
DescriptorBuilder(const DescriptorPool * pool,DescriptorPool::Tables * tables,DescriptorPool::ErrorCollector * error_collector)4103 DescriptorBuilder::DescriptorBuilder(
4104     const DescriptorPool* pool, DescriptorPool::Tables* tables,
4105     DescriptorPool::ErrorCollector* error_collector)
4106     : pool_(pool),
4107       tables_(tables),
4108       error_collector_(error_collector),
4109       had_errors_(false),
4110       possible_undeclared_dependency_(nullptr),
4111       undefine_resolved_name_("") {}
4112 
~DescriptorBuilder()4113 DescriptorBuilder::~DescriptorBuilder() {}
4114 
AddError(const std::string & element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const std::string & error)4115 void DescriptorBuilder::AddError(
4116     const std::string& element_name, const Message& descriptor,
4117     DescriptorPool::ErrorCollector::ErrorLocation location,
4118     const std::string& error) {
4119   if (error_collector_ == nullptr) {
4120     if (!had_errors_) {
4121       GOOGLE_LOG(ERROR) << "Invalid proto descriptor for file \"" << filename_
4122                  << "\":";
4123     }
4124     GOOGLE_LOG(ERROR) << "  " << element_name << ": " << error;
4125   } else {
4126     error_collector_->AddError(filename_, element_name, &descriptor, location,
4127                                error);
4128   }
4129   had_errors_ = true;
4130 }
4131 
AddError(const std::string & element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const char * error)4132 void DescriptorBuilder::AddError(
4133     const std::string& element_name, const Message& descriptor,
4134     DescriptorPool::ErrorCollector::ErrorLocation location, const char* error) {
4135   AddError(element_name, descriptor, location, std::string(error));
4136 }
4137 
AddNotDefinedError(const std::string & element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const std::string & undefined_symbol)4138 void DescriptorBuilder::AddNotDefinedError(
4139     const std::string& element_name, const Message& descriptor,
4140     DescriptorPool::ErrorCollector::ErrorLocation location,
4141     const std::string& undefined_symbol) {
4142   if (possible_undeclared_dependency_ == nullptr &&
4143       undefine_resolved_name_.empty()) {
4144     AddError(element_name, descriptor, location,
4145              "\"" + undefined_symbol + "\" is not defined.");
4146   } else {
4147     if (possible_undeclared_dependency_ != nullptr) {
4148       AddError(element_name, descriptor, location,
4149                "\"" + possible_undeclared_dependency_name_ +
4150                    "\" seems to be defined in \"" +
4151                    possible_undeclared_dependency_->name() +
4152                    "\", which is not "
4153                    "imported by \"" +
4154                    filename_ +
4155                    "\".  To use it here, please "
4156                    "add the necessary import.");
4157     }
4158     if (!undefine_resolved_name_.empty()) {
4159       AddError(element_name, descriptor, location,
4160                "\"" + undefined_symbol + "\" is resolved to \"" +
4161                    undefine_resolved_name_ +
4162                    "\", which is not defined. "
4163                    "The innermost scope is searched first in name resolution. "
4164                    "Consider using a leading '.'(i.e., \"." +
4165                    undefined_symbol + "\") to start from the outermost scope.");
4166     }
4167   }
4168 }
4169 
AddWarning(const std::string & element_name,const Message & descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,const std::string & error)4170 void DescriptorBuilder::AddWarning(
4171     const std::string& element_name, const Message& descriptor,
4172     DescriptorPool::ErrorCollector::ErrorLocation location,
4173     const std::string& error) {
4174   if (error_collector_ == nullptr) {
4175     GOOGLE_LOG(WARNING) << filename_ << " " << element_name << ": " << error;
4176   } else {
4177     error_collector_->AddWarning(filename_, element_name, &descriptor, location,
4178                                  error);
4179   }
4180 }
4181 
IsInPackage(const FileDescriptor * file,const std::string & package_name)4182 bool DescriptorBuilder::IsInPackage(const FileDescriptor* file,
4183                                     const std::string& package_name) {
4184   return HasPrefixString(file->package(), package_name) &&
4185          (file->package().size() == package_name.size() ||
4186           file->package()[package_name.size()] == '.');
4187 }
4188 
RecordPublicDependencies(const FileDescriptor * file)4189 void DescriptorBuilder::RecordPublicDependencies(const FileDescriptor* file) {
4190   if (file == nullptr || !dependencies_.insert(file).second) return;
4191   for (int i = 0; file != nullptr && i < file->public_dependency_count(); i++) {
4192     RecordPublicDependencies(file->public_dependency(i));
4193   }
4194 }
4195 
FindSymbolNotEnforcingDepsHelper(const DescriptorPool * pool,const std::string & name,bool build_it)4196 Symbol DescriptorBuilder::FindSymbolNotEnforcingDepsHelper(
4197     const DescriptorPool* pool, const std::string& name, bool build_it) {
4198   // If we are looking at an underlay, we must lock its mutex_, since we are
4199   // accessing the underlay's tables_ directly.
4200   MutexLockMaybe lock((pool == pool_) ? nullptr : pool->mutex_);
4201 
4202   Symbol result = pool->tables_->FindSymbol(name);
4203   if (result.IsNull() && pool->underlay_ != nullptr) {
4204     // Symbol not found; check the underlay.
4205     result = FindSymbolNotEnforcingDepsHelper(pool->underlay_, name);
4206   }
4207 
4208   if (result.IsNull()) {
4209     // With lazily_build_dependencies_, a symbol lookup at cross link time is
4210     // not guaranteed to be successful. In most cases, build_it will be false,
4211     // which intentionally prevents us from building an import until it's
4212     // actually needed. In some cases, like registering an extension, we want
4213     // to build the file containing the symbol, and build_it will be set.
4214     // Also, build_it will be true when !lazily_build_dependencies_, to provide
4215     // better error reporting of missing dependencies.
4216     if (build_it && pool->TryFindSymbolInFallbackDatabase(name)) {
4217       result = pool->tables_->FindSymbol(name);
4218     }
4219   }
4220 
4221   return result;
4222 }
4223 
FindSymbolNotEnforcingDeps(const std::string & name,bool build_it)4224 Symbol DescriptorBuilder::FindSymbolNotEnforcingDeps(const std::string& name,
4225                                                      bool build_it) {
4226   Symbol result = FindSymbolNotEnforcingDepsHelper(pool_, name, build_it);
4227   // Only find symbols which were defined in this file or one of its
4228   // dependencies.
4229   const FileDescriptor* file = result.GetFile();
4230   if (file == file_ || dependencies_.count(file) > 0) {
4231     unused_dependency_.erase(file);
4232   }
4233   return result;
4234 }
4235 
FindSymbol(const std::string & name,bool build_it)4236 Symbol DescriptorBuilder::FindSymbol(const std::string& name, bool build_it) {
4237   Symbol result = FindSymbolNotEnforcingDeps(name, build_it);
4238 
4239   if (result.IsNull()) return result;
4240 
4241   if (!pool_->enforce_dependencies_) {
4242     // Hack for CompilerUpgrader, and also used for lazily_build_dependencies_
4243     return result;
4244   }
4245 
4246   // Only find symbols which were defined in this file or one of its
4247   // dependencies.
4248   const FileDescriptor* file = result.GetFile();
4249   if (file == file_ || dependencies_.count(file) > 0) {
4250     return result;
4251   }
4252 
4253   if (result.type() == Symbol::PACKAGE) {
4254     // Arg, this is overcomplicated.  The symbol is a package name.  It could
4255     // be that the package was defined in multiple files.  result.GetFile()
4256     // returns the first file we saw that used this package.  We've determined
4257     // that that file is not a direct dependency of the file we are currently
4258     // building, but it could be that some other file which *is* a direct
4259     // dependency also defines the same package.  We can't really rule out this
4260     // symbol unless none of the dependencies define it.
4261     if (IsInPackage(file_, name)) return result;
4262     for (std::set<const FileDescriptor*>::const_iterator it =
4263              dependencies_.begin();
4264          it != dependencies_.end(); ++it) {
4265       // Note:  A dependency may be nullptr if it was not found or had errors.
4266       if (*it != nullptr && IsInPackage(*it, name)) return result;
4267     }
4268   }
4269 
4270   possible_undeclared_dependency_ = file;
4271   possible_undeclared_dependency_name_ = name;
4272   return kNullSymbol;
4273 }
4274 
LookupSymbolNoPlaceholder(const std::string & name,const std::string & relative_to,ResolveMode resolve_mode,bool build_it)4275 Symbol DescriptorBuilder::LookupSymbolNoPlaceholder(
4276     const std::string& name, const std::string& relative_to,
4277     ResolveMode resolve_mode, bool build_it) {
4278   possible_undeclared_dependency_ = nullptr;
4279   undefine_resolved_name_.clear();
4280 
4281   if (!name.empty() && name[0] == '.') {
4282     // Fully-qualified name.
4283     return FindSymbol(name.substr(1), build_it);
4284   }
4285 
4286   // If name is something like "Foo.Bar.baz", and symbols named "Foo" are
4287   // defined in multiple parent scopes, we only want to find "Bar.baz" in the
4288   // innermost one.  E.g., the following should produce an error:
4289   //   message Bar { message Baz {} }
4290   //   message Foo {
4291   //     message Bar {
4292   //     }
4293   //     optional Bar.Baz baz = 1;
4294   //   }
4295   // So, we look for just "Foo" first, then look for "Bar.baz" within it if
4296   // found.
4297   std::string::size_type name_dot_pos = name.find_first_of('.');
4298   std::string first_part_of_name;
4299   if (name_dot_pos == std::string::npos) {
4300     first_part_of_name = name;
4301   } else {
4302     first_part_of_name = name.substr(0, name_dot_pos);
4303   }
4304 
4305   std::string scope_to_try(relative_to);
4306 
4307   while (true) {
4308     // Chop off the last component of the scope.
4309     std::string::size_type dot_pos = scope_to_try.find_last_of('.');
4310     if (dot_pos == std::string::npos) {
4311       return FindSymbol(name, build_it);
4312     } else {
4313       scope_to_try.erase(dot_pos);
4314     }
4315 
4316     // Append ".first_part_of_name" and try to find.
4317     std::string::size_type old_size = scope_to_try.size();
4318     scope_to_try.append(1, '.');
4319     scope_to_try.append(first_part_of_name);
4320     Symbol result = FindSymbol(scope_to_try, build_it);
4321     if (!result.IsNull()) {
4322       if (first_part_of_name.size() < name.size()) {
4323         // name is a compound symbol, of which we only found the first part.
4324         // Now try to look up the rest of it.
4325         if (result.IsAggregate()) {
4326           scope_to_try.append(name, first_part_of_name.size(),
4327                               name.size() - first_part_of_name.size());
4328           result = FindSymbol(scope_to_try, build_it);
4329           if (result.IsNull()) {
4330             undefine_resolved_name_ = scope_to_try;
4331           }
4332           return result;
4333         } else {
4334           // We found a symbol but it's not an aggregate.  Continue the loop.
4335         }
4336       } else {
4337         if (resolve_mode == LOOKUP_TYPES && !result.IsType()) {
4338           // We found a symbol but it's not a type.  Continue the loop.
4339         } else {
4340           return result;
4341         }
4342       }
4343     }
4344 
4345     // Not found.  Remove the name so we can try again.
4346     scope_to_try.erase(old_size);
4347   }
4348 }
4349 
LookupSymbol(const std::string & name,const std::string & relative_to,DescriptorPool::PlaceholderType placeholder_type,ResolveMode resolve_mode,bool build_it)4350 Symbol DescriptorBuilder::LookupSymbol(
4351     const std::string& name, const std::string& relative_to,
4352     DescriptorPool::PlaceholderType placeholder_type, ResolveMode resolve_mode,
4353     bool build_it) {
4354   Symbol result =
4355       LookupSymbolNoPlaceholder(name, relative_to, resolve_mode, build_it);
4356   if (result.IsNull() && pool_->allow_unknown_) {
4357     // Not found, but AllowUnknownDependencies() is enabled.  Return a
4358     // placeholder instead.
4359     result = pool_->NewPlaceholderWithMutexHeld(name, placeholder_type);
4360   }
4361   return result;
4362 }
4363 
ValidateQualifiedName(StringPiece name)4364 static bool ValidateQualifiedName(StringPiece name) {
4365   bool last_was_period = false;
4366 
4367   for (char character : name) {
4368     // I don't trust isalnum() due to locales.  :(
4369     if (('a' <= character && character <= 'z') ||
4370         ('A' <= character && character <= 'Z') ||
4371         ('0' <= character && character <= '9') || (character == '_')) {
4372       last_was_period = false;
4373     } else if (character == '.') {
4374       if (last_was_period) return false;
4375       last_was_period = true;
4376     } else {
4377       return false;
4378     }
4379   }
4380 
4381   return !name.empty() && !last_was_period;
4382 }
4383 
NewPlaceholder(StringPiece name,PlaceholderType placeholder_type) const4384 Symbol DescriptorPool::NewPlaceholder(StringPiece name,
4385                                       PlaceholderType placeholder_type) const {
4386   MutexLockMaybe lock(mutex_);
4387   return NewPlaceholderWithMutexHeld(name, placeholder_type);
4388 }
4389 
NewPlaceholderWithMutexHeld(StringPiece name,PlaceholderType placeholder_type) const4390 Symbol DescriptorPool::NewPlaceholderWithMutexHeld(
4391     StringPiece name, PlaceholderType placeholder_type) const {
4392   if (mutex_) {
4393     mutex_->AssertHeld();
4394   }
4395   // Compute names.
4396   StringPiece placeholder_full_name;
4397   StringPiece placeholder_name;
4398   const std::string* placeholder_package;
4399 
4400   if (!ValidateQualifiedName(name)) return kNullSymbol;
4401   if (name[0] == '.') {
4402     // Fully-qualified.
4403     placeholder_full_name = name.substr(1);
4404   } else {
4405     placeholder_full_name = name;
4406   }
4407 
4408   std::string::size_type dotpos = placeholder_full_name.find_last_of('.');
4409   if (dotpos != std::string::npos) {
4410     placeholder_package =
4411         tables_->AllocateString(placeholder_full_name.substr(0, dotpos));
4412     placeholder_name = placeholder_full_name.substr(dotpos + 1);
4413   } else {
4414     placeholder_package = &internal::GetEmptyString();
4415     placeholder_name = placeholder_full_name;
4416   }
4417 
4418   // Create the placeholders.
4419   FileDescriptor* placeholder_file = NewPlaceholderFileWithMutexHeld(
4420       StrCat(placeholder_full_name, ".placeholder.proto"));
4421   placeholder_file->package_ = placeholder_package;
4422 
4423   if (placeholder_type == PLACEHOLDER_ENUM) {
4424     placeholder_file->enum_type_count_ = 1;
4425     placeholder_file->enum_types_ = tables_->AllocateArray<EnumDescriptor>(1);
4426 
4427     EnumDescriptor* placeholder_enum = &placeholder_file->enum_types_[0];
4428     memset(static_cast<void*>(placeholder_enum), 0, sizeof(*placeholder_enum));
4429 
4430     placeholder_enum->all_names_ =
4431         tables_->AllocateStringArray(placeholder_name, placeholder_full_name);
4432     placeholder_enum->file_ = placeholder_file;
4433     placeholder_enum->options_ = &EnumOptions::default_instance();
4434     placeholder_enum->is_placeholder_ = true;
4435     placeholder_enum->is_unqualified_placeholder_ = (name[0] != '.');
4436 
4437     // Enums must have at least one value.
4438     placeholder_enum->value_count_ = 1;
4439     placeholder_enum->values_ = tables_->AllocateArray<EnumValueDescriptor>(1);
4440 
4441     EnumValueDescriptor* placeholder_value = &placeholder_enum->values_[0];
4442     memset(static_cast<void*>(placeholder_value), 0,
4443            sizeof(*placeholder_value));
4444 
4445     // Note that enum value names are siblings of their type, not children.
4446     placeholder_value->all_names_ = tables_->AllocateStringArray(
4447         "PLACEHOLDER_VALUE", placeholder_package->empty()
4448                                  ? "PLACEHOLDER_VALUE"
4449                                  : *placeholder_package + ".PLACEHOLDER_VALUE");
4450 
4451     placeholder_value->number_ = 0;
4452     placeholder_value->type_ = placeholder_enum;
4453     placeholder_value->options_ = &EnumValueOptions::default_instance();
4454 
4455     return Symbol(placeholder_enum);
4456   } else {
4457     placeholder_file->message_type_count_ = 1;
4458     placeholder_file->message_types_ = tables_->AllocateArray<Descriptor>(1);
4459 
4460     Descriptor* placeholder_message = &placeholder_file->message_types_[0];
4461     memset(static_cast<void*>(placeholder_message), 0,
4462            sizeof(*placeholder_message));
4463 
4464     placeholder_message->all_names_ =
4465         tables_->AllocateStringArray(placeholder_name, placeholder_full_name);
4466     placeholder_message->file_ = placeholder_file;
4467     placeholder_message->options_ = &MessageOptions::default_instance();
4468     placeholder_message->is_placeholder_ = true;
4469     placeholder_message->is_unqualified_placeholder_ = (name[0] != '.');
4470 
4471     if (placeholder_type == PLACEHOLDER_EXTENDABLE_MESSAGE) {
4472       placeholder_message->extension_range_count_ = 1;
4473       placeholder_message->extension_ranges_ =
4474           tables_->AllocateArray<Descriptor::ExtensionRange>(1);
4475       placeholder_message->extension_ranges_->start = 1;
4476       // kMaxNumber + 1 because ExtensionRange::end is exclusive.
4477       placeholder_message->extension_ranges_->end =
4478           FieldDescriptor::kMaxNumber + 1;
4479       placeholder_message->extension_ranges_->options_ = nullptr;
4480     }
4481 
4482     return Symbol(placeholder_message);
4483   }
4484 }
4485 
NewPlaceholderFile(StringPiece name) const4486 FileDescriptor* DescriptorPool::NewPlaceholderFile(
4487     StringPiece name) const {
4488   MutexLockMaybe lock(mutex_);
4489   return NewPlaceholderFileWithMutexHeld(name);
4490 }
4491 
NewPlaceholderFileWithMutexHeld(StringPiece name) const4492 FileDescriptor* DescriptorPool::NewPlaceholderFileWithMutexHeld(
4493     StringPiece name) const {
4494   if (mutex_) {
4495     mutex_->AssertHeld();
4496   }
4497   FileDescriptor* placeholder = tables_->Allocate<FileDescriptor>();
4498   memset(static_cast<void*>(placeholder), 0, sizeof(*placeholder));
4499 
4500   placeholder->name_ = tables_->AllocateString(name);
4501   placeholder->package_ = &internal::GetEmptyString();
4502   placeholder->pool_ = this;
4503   placeholder->options_ = &FileOptions::default_instance();
4504   placeholder->tables_ = &FileDescriptorTables::GetEmptyInstance();
4505   placeholder->source_code_info_ = &SourceCodeInfo::default_instance();
4506   placeholder->is_placeholder_ = true;
4507   placeholder->syntax_ = FileDescriptor::SYNTAX_UNKNOWN;
4508   placeholder->finished_building_ = true;
4509   // All other fields are zero or nullptr.
4510 
4511   return placeholder;
4512 }
4513 
AddSymbol(const std::string & full_name,const void * parent,const std::string & name,const Message & proto,Symbol symbol)4514 bool DescriptorBuilder::AddSymbol(const std::string& full_name,
4515                                   const void* parent, const std::string& name,
4516                                   const Message& proto, Symbol symbol) {
4517   // If the caller passed nullptr for the parent, the symbol is at file scope.
4518   // Use its file as the parent instead.
4519   if (parent == nullptr) parent = file_;
4520 
4521   if (full_name.find('\0') != std::string::npos) {
4522     AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4523              "\"" + full_name + "\" contains null character.");
4524     return false;
4525   }
4526   if (tables_->AddSymbol(full_name, symbol)) {
4527     if (!file_tables_->AddAliasUnderParent(parent, name, symbol)) {
4528       // This is only possible if there was already an error adding something of
4529       // the same name.
4530       if (!had_errors_) {
4531         GOOGLE_LOG(DFATAL) << "\"" << full_name
4532                     << "\" not previously defined in "
4533                        "symbols_by_name_, but was defined in "
4534                        "symbols_by_parent_; this shouldn't be possible.";
4535       }
4536       return false;
4537     }
4538     return true;
4539   } else {
4540     const FileDescriptor* other_file = tables_->FindSymbol(full_name).GetFile();
4541     if (other_file == file_) {
4542       std::string::size_type dot_pos = full_name.find_last_of('.');
4543       if (dot_pos == std::string::npos) {
4544         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4545                  "\"" + full_name + "\" is already defined.");
4546       } else {
4547         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4548                  "\"" + full_name.substr(dot_pos + 1) +
4549                      "\" is already defined in \"" +
4550                      full_name.substr(0, dot_pos) + "\".");
4551       }
4552     } else {
4553       // Symbol seems to have been defined in a different file.
4554       AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4555                "\"" + full_name + "\" is already defined in file \"" +
4556                    (other_file == nullptr ? "null" : other_file->name()) +
4557                    "\".");
4558     }
4559     return false;
4560   }
4561 }
4562 
AddPackage(const std::string & name,const Message & proto,FileDescriptor * file)4563 void DescriptorBuilder::AddPackage(const std::string& name,
4564                                    const Message& proto, FileDescriptor* file) {
4565   if (name.find('\0') != std::string::npos) {
4566     AddError(name, proto, DescriptorPool::ErrorCollector::NAME,
4567              "\"" + name + "\" contains null character.");
4568     return;
4569   }
4570 
4571   Symbol existing_symbol = tables_->FindSymbol(name);
4572   // It's OK to redefine a package.
4573   if (existing_symbol.IsNull()) {
4574     auto* package = tables_->AllocateArray<Symbol::Package>(1);
4575     // If the name is the package name, then it is already in the arena.
4576     // If not, copy it there. It came from the call to AddPackage below.
4577     package->name =
4578         &name == &file->package() ? &name : tables_->AllocateString(name);
4579     package->file = file;
4580     tables_->AddSymbol(*package->name, Symbol(package));
4581     // Also add parent package, if any.
4582     std::string::size_type dot_pos = name.find_last_of('.');
4583     if (dot_pos == std::string::npos) {
4584       // No parents.
4585       ValidateSymbolName(name, name, proto);
4586     } else {
4587       // Has parent.
4588       AddPackage(name.substr(0, dot_pos), proto, file);
4589       ValidateSymbolName(name.substr(dot_pos + 1), name, proto);
4590     }
4591   } else if (existing_symbol.type() != Symbol::PACKAGE) {
4592     // Symbol seems to have been defined in a different file.
4593     AddError(name, proto, DescriptorPool::ErrorCollector::NAME,
4594              "\"" + name +
4595                  "\" is already defined (as something other than "
4596                  "a package) in file \"" +
4597                  existing_symbol.GetFile()->name() + "\".");
4598   }
4599 }
4600 
ValidateSymbolName(const std::string & name,const std::string & full_name,const Message & proto)4601 void DescriptorBuilder::ValidateSymbolName(const std::string& name,
4602                                            const std::string& full_name,
4603                                            const Message& proto) {
4604   if (name.empty()) {
4605     AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4606              "Missing name.");
4607   } else {
4608     for (char character : name) {
4609       // I don't trust isalnum() due to locales.  :(
4610       if ((character < 'a' || 'z' < character) &&
4611           (character < 'A' || 'Z' < character) &&
4612           (character < '0' || '9' < character) && (character != '_')) {
4613         AddError(full_name, proto, DescriptorPool::ErrorCollector::NAME,
4614                  "\"" + name + "\" is not a valid identifier.");
4615       }
4616     }
4617   }
4618 }
4619 
4620 // -------------------------------------------------------------------
4621 
4622 // This generic implementation is good for all descriptors except
4623 // FileDescriptor.
4624 template <class DescriptorT>
AllocateOptions(const typename DescriptorT::OptionsType & orig_options,DescriptorT * descriptor,int options_field_tag,const std::string & option_name)4625 void DescriptorBuilder::AllocateOptions(
4626     const typename DescriptorT::OptionsType& orig_options,
4627     DescriptorT* descriptor, int options_field_tag,
4628     const std::string& option_name) {
4629   std::vector<int> options_path;
4630   descriptor->GetLocationPath(&options_path);
4631   options_path.push_back(options_field_tag);
4632   AllocateOptionsImpl(descriptor->full_name(), descriptor->full_name(),
4633                       orig_options, descriptor, options_path, option_name);
4634 }
4635 
4636 // We specialize for FileDescriptor.
AllocateOptions(const FileOptions & orig_options,FileDescriptor * descriptor)4637 void DescriptorBuilder::AllocateOptions(const FileOptions& orig_options,
4638                                         FileDescriptor* descriptor) {
4639   std::vector<int> options_path;
4640   options_path.push_back(FileDescriptorProto::kOptionsFieldNumber);
4641   // We add the dummy token so that LookupSymbol does the right thing.
4642   AllocateOptionsImpl(descriptor->package() + ".dummy", descriptor->name(),
4643                       orig_options, descriptor, options_path,
4644                       "google.protobuf.FileOptions");
4645 }
4646 
4647 template <class DescriptorT>
AllocateOptionsImpl(const std::string & name_scope,const std::string & element_name,const typename DescriptorT::OptionsType & orig_options,DescriptorT * descriptor,const std::vector<int> & options_path,const std::string & option_name)4648 void DescriptorBuilder::AllocateOptionsImpl(
4649     const std::string& name_scope, const std::string& element_name,
4650     const typename DescriptorT::OptionsType& orig_options,
4651     DescriptorT* descriptor, const std::vector<int>& options_path,
4652     const std::string& option_name) {
4653   // We need to use a dummy pointer to work around a bug in older versions of
4654   // GCC.  Otherwise, the following two lines could be replaced with:
4655   //   typename DescriptorT::OptionsType* options =
4656   //       tables_->AllocateMessage<typename DescriptorT::OptionsType>();
4657   typename DescriptorT::OptionsType* const dummy = nullptr;
4658   typename DescriptorT::OptionsType* options = tables_->AllocateMessage(dummy);
4659 
4660   if (!orig_options.IsInitialized()) {
4661     AddError(name_scope + "." + element_name, orig_options,
4662              DescriptorPool::ErrorCollector::OPTION_NAME,
4663              "Uninterpreted option is missing name or value.");
4664     return;
4665   }
4666 
4667   // Avoid using MergeFrom()/CopyFrom() in this class to make it -fno-rtti
4668   // friendly. Without RTTI, MergeFrom() and CopyFrom() will fallback to the
4669   // reflection based method, which requires the Descriptor. However, we are in
4670   // the middle of building the descriptors, thus the deadlock.
4671   options->ParseFromString(orig_options.SerializeAsString());
4672   descriptor->options_ = options;
4673 
4674   // Don't add to options_to_interpret_ unless there were uninterpreted
4675   // options.  This not only avoids unnecessary work, but prevents a
4676   // bootstrapping problem when building descriptors for descriptor.proto.
4677   // descriptor.proto does not contain any uninterpreted options, but
4678   // attempting to interpret options anyway will cause
4679   // OptionsType::GetDescriptor() to be called which may then deadlock since
4680   // we're still trying to build it.
4681   if (options->uninterpreted_option_size() > 0) {
4682     options_to_interpret_.push_back(OptionsToInterpret(
4683         name_scope, element_name, options_path, &orig_options, options));
4684   }
4685 
4686   // If the custom option is in unknown fields, no need to interpret it.
4687   // Remove the dependency file from unused_dependency.
4688   const UnknownFieldSet& unknown_fields = orig_options.unknown_fields();
4689   if (!unknown_fields.empty()) {
4690     // Can not use options->GetDescriptor() which may case deadlock.
4691     Symbol msg_symbol = tables_->FindSymbol(option_name);
4692     if (msg_symbol.type() == Symbol::MESSAGE) {
4693       for (int i = 0; i < unknown_fields.field_count(); ++i) {
4694         assert_mutex_held(pool_);
4695         const FieldDescriptor* field =
4696             pool_->InternalFindExtensionByNumberNoLock(
4697                 msg_symbol.descriptor(), unknown_fields.field(i).number());
4698         if (field) {
4699           unused_dependency_.erase(field->file());
4700         }
4701       }
4702     }
4703   }
4704 }
4705 
4706 // A common pattern:  We want to convert a repeated field in the descriptor
4707 // to an array of values, calling some method to build each value.
4708 #define BUILD_ARRAY(INPUT, OUTPUT, NAME, METHOD, PARENT) \
4709   OUTPUT->NAME##_count_ = INPUT.NAME##_size();           \
4710   AllocateArray(INPUT.NAME##_size(), &OUTPUT->NAME##s_); \
4711   for (int i = 0; i < INPUT.NAME##_size(); i++) {        \
4712     METHOD(INPUT.NAME(i), PARENT, OUTPUT->NAME##s_ + i); \
4713   }
4714 
AddRecursiveImportError(const FileDescriptorProto & proto,int from_here)4715 void DescriptorBuilder::AddRecursiveImportError(
4716     const FileDescriptorProto& proto, int from_here) {
4717   std::string error_message("File recursively imports itself: ");
4718   for (size_t i = from_here; i < tables_->pending_files_.size(); i++) {
4719     error_message.append(tables_->pending_files_[i]);
4720     error_message.append(" -> ");
4721   }
4722   error_message.append(proto.name());
4723 
4724   if (static_cast<size_t>(from_here) < tables_->pending_files_.size() - 1) {
4725     AddError(tables_->pending_files_[from_here + 1], proto,
4726              DescriptorPool::ErrorCollector::IMPORT, error_message);
4727   } else {
4728     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::IMPORT,
4729              error_message);
4730   }
4731 }
4732 
AddTwiceListedError(const FileDescriptorProto & proto,int index)4733 void DescriptorBuilder::AddTwiceListedError(const FileDescriptorProto& proto,
4734                                             int index) {
4735   AddError(proto.dependency(index), proto,
4736            DescriptorPool::ErrorCollector::IMPORT,
4737            "Import \"" + proto.dependency(index) + "\" was listed twice.");
4738 }
4739 
AddImportError(const FileDescriptorProto & proto,int index)4740 void DescriptorBuilder::AddImportError(const FileDescriptorProto& proto,
4741                                        int index) {
4742   std::string message;
4743   if (pool_->fallback_database_ == nullptr) {
4744     message = "Import \"" + proto.dependency(index) + "\" has not been loaded.";
4745   } else {
4746     message = "Import \"" + proto.dependency(index) +
4747               "\" was not found or had errors.";
4748   }
4749   AddError(proto.dependency(index), proto,
4750            DescriptorPool::ErrorCollector::IMPORT, message);
4751 }
4752 
ExistingFileMatchesProto(const FileDescriptor * existing_file,const FileDescriptorProto & proto)4753 static bool ExistingFileMatchesProto(const FileDescriptor* existing_file,
4754                                      const FileDescriptorProto& proto) {
4755   FileDescriptorProto existing_proto;
4756   existing_file->CopyTo(&existing_proto);
4757   // TODO(liujisi): Remove it when CopyTo supports copying syntax params when
4758   // syntax="proto2".
4759   if (existing_file->syntax() == FileDescriptor::SYNTAX_PROTO2 &&
4760       proto.has_syntax()) {
4761     existing_proto.set_syntax(
4762         existing_file->SyntaxName(existing_file->syntax()));
4763   }
4764 
4765   return existing_proto.SerializeAsString() == proto.SerializeAsString();
4766 }
4767 
BuildFile(const FileDescriptorProto & proto)4768 const FileDescriptor* DescriptorBuilder::BuildFile(
4769     const FileDescriptorProto& proto) {
4770   filename_ = proto.name();
4771 
4772   // Check if the file already exists and is identical to the one being built.
4773   // Note:  This only works if the input is canonical -- that is, it
4774   //   fully-qualifies all type names, has no UninterpretedOptions, etc.
4775   //   This is fine, because this idempotency "feature" really only exists to
4776   //   accommodate one hack in the proto1->proto2 migration layer.
4777   const FileDescriptor* existing_file = tables_->FindFile(filename_);
4778   if (existing_file != nullptr) {
4779     // File already in pool.  Compare the existing one to the input.
4780     if (ExistingFileMatchesProto(existing_file, proto)) {
4781       // They're identical.  Return the existing descriptor.
4782       return existing_file;
4783     }
4784 
4785     // Not a match.  The error will be detected and handled later.
4786   }
4787 
4788   // Check to see if this file is already on the pending files list.
4789   // TODO(kenton):  Allow recursive imports?  It may not work with some
4790   //   (most?) programming languages.  E.g., in C++, a forward declaration
4791   //   of a type is not sufficient to allow it to be used even in a
4792   //   generated header file due to inlining.  This could perhaps be
4793   //   worked around using tricks involving inserting #include statements
4794   //   mid-file, but that's pretty ugly, and I'm pretty sure there are
4795   //   some languages out there that do not allow recursive dependencies
4796   //   at all.
4797   for (size_t i = 0; i < tables_->pending_files_.size(); i++) {
4798     if (tables_->pending_files_[i] == proto.name()) {
4799       AddRecursiveImportError(proto, i);
4800       return nullptr;
4801     }
4802   }
4803 
4804   // If we have a fallback_database_, and we aren't doing lazy import building,
4805   // attempt to load all dependencies now, before checkpointing tables_.  This
4806   // avoids confusion with recursive checkpoints.
4807   if (!pool_->lazily_build_dependencies_) {
4808     if (pool_->fallback_database_ != nullptr) {
4809       tables_->pending_files_.push_back(proto.name());
4810       for (int i = 0; i < proto.dependency_size(); i++) {
4811         if (tables_->FindFile(proto.dependency(i)) == nullptr &&
4812             (pool_->underlay_ == nullptr ||
4813              pool_->underlay_->FindFileByName(proto.dependency(i)) ==
4814                  nullptr)) {
4815           // We don't care what this returns since we'll find out below anyway.
4816           pool_->TryFindFileInFallbackDatabase(proto.dependency(i));
4817         }
4818       }
4819       tables_->pending_files_.pop_back();
4820     }
4821   }
4822 
4823   // Checkpoint the tables so that we can roll back if something goes wrong.
4824   tables_->AddCheckpoint();
4825 
4826   FileDescriptor* result = BuildFileImpl(proto);
4827 
4828   file_tables_->FinalizeTables();
4829   if (result) {
4830     tables_->ClearLastCheckpoint();
4831     result->finished_building_ = true;
4832   } else {
4833     tables_->RollbackToLastCheckpoint();
4834   }
4835 
4836   return result;
4837 }
4838 
BuildFileImpl(const FileDescriptorProto & proto)4839 FileDescriptor* DescriptorBuilder::BuildFileImpl(
4840     const FileDescriptorProto& proto) {
4841   FileDescriptor* result = tables_->Allocate<FileDescriptor>();
4842   file_ = result;
4843 
4844   result->is_placeholder_ = false;
4845   result->finished_building_ = false;
4846   SourceCodeInfo* info = nullptr;
4847   if (proto.has_source_code_info()) {
4848     info = tables_->AllocateMessage<SourceCodeInfo>();
4849     info->CopyFrom(proto.source_code_info());
4850     result->source_code_info_ = info;
4851   } else {
4852     result->source_code_info_ = &SourceCodeInfo::default_instance();
4853   }
4854 
4855   file_tables_ = tables_->AllocateFileTables();
4856   file_->tables_ = file_tables_;
4857 
4858   if (!proto.has_name()) {
4859     AddError("", proto, DescriptorPool::ErrorCollector::OTHER,
4860              "Missing field: FileDescriptorProto.name.");
4861   }
4862 
4863   // TODO(liujisi): Report error when the syntax is empty after all the protos
4864   // have added the syntax statement.
4865   if (proto.syntax().empty() || proto.syntax() == "proto2") {
4866     file_->syntax_ = FileDescriptor::SYNTAX_PROTO2;
4867   } else if (proto.syntax() == "proto3") {
4868     file_->syntax_ = FileDescriptor::SYNTAX_PROTO3;
4869   } else {
4870     file_->syntax_ = FileDescriptor::SYNTAX_UNKNOWN;
4871     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
4872              "Unrecognized syntax: " + proto.syntax());
4873   }
4874 
4875   result->name_ = tables_->AllocateString(proto.name());
4876   if (proto.has_package()) {
4877     result->package_ = tables_->AllocateString(proto.package());
4878   } else {
4879     // We cannot rely on proto.package() returning a valid string if
4880     // proto.has_package() is false, because we might be running at static
4881     // initialization time, in which case default values have not yet been
4882     // initialized.
4883     result->package_ = tables_->AllocateString("");
4884   }
4885   result->pool_ = pool_;
4886 
4887   if (result->name().find('\0') != std::string::npos) {
4888     AddError(result->name(), proto, DescriptorPool::ErrorCollector::NAME,
4889              "\"" + result->name() + "\" contains null character.");
4890     return nullptr;
4891   }
4892 
4893   // Add to tables.
4894   if (!tables_->AddFile(result)) {
4895     AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
4896              "A file with this name is already in the pool.");
4897     // Bail out early so that if this is actually the exact same file, we
4898     // don't end up reporting that every single symbol is already defined.
4899     return nullptr;
4900   }
4901   if (!result->package().empty()) {
4902     AddPackage(result->package(), proto, result);
4903   }
4904 
4905   // Make sure all dependencies are loaded.
4906   std::set<std::string> seen_dependencies;
4907   result->dependency_count_ = proto.dependency_size();
4908   result->dependencies_ =
4909       tables_->AllocateArray<const FileDescriptor*>(proto.dependency_size());
4910   result->dependencies_once_ = nullptr;
4911   unused_dependency_.clear();
4912   std::set<int> weak_deps;
4913   for (int i = 0; i < proto.weak_dependency_size(); ++i) {
4914     weak_deps.insert(proto.weak_dependency(i));
4915   }
4916   for (int i = 0; i < proto.dependency_size(); i++) {
4917     if (!seen_dependencies.insert(proto.dependency(i)).second) {
4918       AddTwiceListedError(proto, i);
4919     }
4920 
4921     const FileDescriptor* dependency = tables_->FindFile(proto.dependency(i));
4922     if (dependency == nullptr && pool_->underlay_ != nullptr) {
4923       dependency = pool_->underlay_->FindFileByName(proto.dependency(i));
4924     }
4925 
4926     if (dependency == result) {
4927       // Recursive import.  dependency/result is not fully initialized, and it's
4928       // dangerous to try to do anything with it.  The recursive import error
4929       // will be detected and reported in DescriptorBuilder::BuildFile().
4930       return nullptr;
4931     }
4932 
4933     if (dependency == nullptr) {
4934       if (!pool_->lazily_build_dependencies_) {
4935         if (pool_->allow_unknown_ ||
4936             (!pool_->enforce_weak_ && weak_deps.find(i) != weak_deps.end())) {
4937           dependency =
4938               pool_->NewPlaceholderFileWithMutexHeld(proto.dependency(i));
4939         } else {
4940           AddImportError(proto, i);
4941         }
4942       }
4943     } else {
4944       // Add to unused_dependency_ to track unused imported files.
4945       // Note: do not track unused imported files for public import.
4946       if (pool_->enforce_dependencies_ &&
4947           (pool_->unused_import_track_files_.find(proto.name()) !=
4948            pool_->unused_import_track_files_.end()) &&
4949           (dependency->public_dependency_count() == 0)) {
4950         unused_dependency_.insert(dependency);
4951       }
4952     }
4953 
4954     result->dependencies_[i] = dependency;
4955     if (pool_->lazily_build_dependencies_ && !dependency) {
4956       if (result->dependencies_once_ == nullptr) {
4957         result->dependencies_once_ = tables_->AllocateLazyInit();
4958         result->dependencies_once_->file.dependencies_names =
4959             tables_->AllocateArray<const std::string*>(proto.dependency_size());
4960         if (proto.dependency_size() > 0) {
4961           std::fill_n(result->dependencies_once_->file.dependencies_names,
4962                       proto.dependency_size(), nullptr);
4963         }
4964       }
4965 
4966       result->dependencies_once_->file.dependencies_names[i] =
4967           tables_->AllocateString(proto.dependency(i));
4968     }
4969   }
4970 
4971   // Check public dependencies.
4972   int public_dependency_count = 0;
4973   result->public_dependencies_ =
4974       tables_->AllocateArray<int>(proto.public_dependency_size());
4975   for (int i = 0; i < proto.public_dependency_size(); i++) {
4976     // Only put valid public dependency indexes.
4977     int index = proto.public_dependency(i);
4978     if (index >= 0 && index < proto.dependency_size()) {
4979       result->public_dependencies_[public_dependency_count++] = index;
4980       // Do not track unused imported files for public import.
4981       // Calling dependency(i) builds that file when doing lazy imports,
4982       // need to avoid doing this. Unused dependency detection isn't done
4983       // when building lazily, anyways.
4984       if (!pool_->lazily_build_dependencies_) {
4985         unused_dependency_.erase(result->dependency(index));
4986       }
4987     } else {
4988       AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
4989                "Invalid public dependency index.");
4990     }
4991   }
4992   result->public_dependency_count_ = public_dependency_count;
4993 
4994   // Build dependency set
4995   dependencies_.clear();
4996   // We don't/can't do proper dependency error checking when
4997   // lazily_build_dependencies_, and calling dependency(i) will force
4998   // a dependency to be built, which we don't want.
4999   if (!pool_->lazily_build_dependencies_) {
5000     for (int i = 0; i < result->dependency_count(); i++) {
5001       RecordPublicDependencies(result->dependency(i));
5002     }
5003   }
5004 
5005   // Check weak dependencies.
5006   int weak_dependency_count = 0;
5007   result->weak_dependencies_ =
5008       tables_->AllocateArray<int>(proto.weak_dependency_size());
5009   for (int i = 0; i < proto.weak_dependency_size(); i++) {
5010     int index = proto.weak_dependency(i);
5011     if (index >= 0 && index < proto.dependency_size()) {
5012       result->weak_dependencies_[weak_dependency_count++] = index;
5013     } else {
5014       AddError(proto.name(), proto, DescriptorPool::ErrorCollector::OTHER,
5015                "Invalid weak dependency index.");
5016     }
5017   }
5018   result->weak_dependency_count_ = weak_dependency_count;
5019 
5020   // Convert children.
5021   BUILD_ARRAY(proto, result, message_type, BuildMessage, nullptr);
5022   BUILD_ARRAY(proto, result, enum_type, BuildEnum, nullptr);
5023   BUILD_ARRAY(proto, result, service, BuildService, nullptr);
5024   BUILD_ARRAY(proto, result, extension, BuildExtension, nullptr);
5025 
5026   // Copy options.
5027   result->options_ = nullptr;  // Set to default_instance later if necessary.
5028   if (proto.has_options()) {
5029     AllocateOptions(proto.options(), result);
5030   }
5031 
5032   // Note that the following steps must occur in exactly the specified order.
5033 
5034   // Cross-link.
5035   CrossLinkFile(result, proto);
5036 
5037   // Interpret any remaining uninterpreted options gathered into
5038   // options_to_interpret_ during descriptor building.  Cross-linking has made
5039   // extension options known, so all interpretations should now succeed.
5040   if (!had_errors_) {
5041     OptionInterpreter option_interpreter(this);
5042     for (std::vector<OptionsToInterpret>::iterator iter =
5043              options_to_interpret_.begin();
5044          iter != options_to_interpret_.end(); ++iter) {
5045       option_interpreter.InterpretOptions(&(*iter));
5046     }
5047     options_to_interpret_.clear();
5048     if (info != nullptr) {
5049       option_interpreter.UpdateSourceCodeInfo(info);
5050     }
5051   }
5052 
5053   // Validate options. See comments at InternalSetLazilyBuildDependencies about
5054   // error checking and lazy import building.
5055   if (!had_errors_ && !pool_->lazily_build_dependencies_) {
5056     ValidateFileOptions(result, proto);
5057   }
5058 
5059   // Additional naming conflict check for map entry types. Only need to check
5060   // this if there are already errors.
5061   if (had_errors_) {
5062     for (int i = 0; i < proto.message_type_size(); ++i) {
5063       DetectMapConflicts(result->message_type(i), proto.message_type(i));
5064     }
5065   }
5066 
5067 
5068   // Again, see comments at InternalSetLazilyBuildDependencies about error
5069   // checking. Also, don't log unused dependencies if there were previous
5070   // errors, since the results might be inaccurate.
5071   if (!had_errors_ && !unused_dependency_.empty() &&
5072       !pool_->lazily_build_dependencies_) {
5073     LogUnusedDependency(proto, result);
5074   }
5075 
5076   if (had_errors_) {
5077     return nullptr;
5078   } else {
5079     return result;
5080   }
5081 }
5082 
5083 
AllocateNameStrings(const std::string & scope,const std::string & proto_name)5084 const std::string* DescriptorBuilder::AllocateNameStrings(
5085     const std::string& scope, const std::string& proto_name) {
5086   if (scope.empty()) {
5087     return tables_->AllocateStringArray(proto_name, proto_name);
5088   } else {
5089     return tables_->AllocateStringArray(proto_name,
5090                                         StrCat(scope, ".", proto_name));
5091   }
5092 }
5093 
BuildMessage(const DescriptorProto & proto,const Descriptor * parent,Descriptor * result)5094 void DescriptorBuilder::BuildMessage(const DescriptorProto& proto,
5095                                      const Descriptor* parent,
5096                                      Descriptor* result) {
5097   const std::string& scope =
5098       (parent == nullptr) ? file_->package() : parent->full_name();
5099   result->all_names_ = AllocateNameStrings(scope, proto.name());
5100   ValidateSymbolName(proto.name(), result->full_name(), proto);
5101 
5102   result->file_ = file_;
5103   result->containing_type_ = parent;
5104   result->is_placeholder_ = false;
5105   result->is_unqualified_placeholder_ = false;
5106   result->well_known_type_ = Descriptor::WELLKNOWNTYPE_UNSPECIFIED;
5107 
5108   auto it = pool_->tables_->well_known_types_.find(result->full_name());
5109   if (it != pool_->tables_->well_known_types_.end()) {
5110     result->well_known_type_ = it->second;
5111   }
5112 
5113   // Build oneofs first so that fields and extension ranges can refer to them.
5114   BUILD_ARRAY(proto, result, oneof_decl, BuildOneof, result);
5115   BUILD_ARRAY(proto, result, field, BuildField, result);
5116   BUILD_ARRAY(proto, result, nested_type, BuildMessage, result);
5117   BUILD_ARRAY(proto, result, enum_type, BuildEnum, result);
5118   BUILD_ARRAY(proto, result, extension_range, BuildExtensionRange, result);
5119   BUILD_ARRAY(proto, result, extension, BuildExtension, result);
5120   BUILD_ARRAY(proto, result, reserved_range, BuildReservedRange, result);
5121 
5122   // Copy reserved names.
5123   int reserved_name_count = proto.reserved_name_size();
5124   result->reserved_name_count_ = reserved_name_count;
5125   result->reserved_names_ =
5126       tables_->AllocateArray<const std::string*>(reserved_name_count);
5127   for (int i = 0; i < reserved_name_count; ++i) {
5128     result->reserved_names_[i] =
5129         tables_->AllocateString(proto.reserved_name(i));
5130   }
5131 
5132   // Copy options.
5133   result->options_ = nullptr;  // Set to default_instance later if necessary.
5134   if (proto.has_options()) {
5135     AllocateOptions(proto.options(), result,
5136                     DescriptorProto::kOptionsFieldNumber,
5137                     "google.protobuf.MessageOptions");
5138   }
5139 
5140   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
5141 
5142   for (int i = 0; i < proto.reserved_range_size(); i++) {
5143     const DescriptorProto_ReservedRange& range1 = proto.reserved_range(i);
5144     for (int j = i + 1; j < proto.reserved_range_size(); j++) {
5145       const DescriptorProto_ReservedRange& range2 = proto.reserved_range(j);
5146       if (range1.end() > range2.start() && range2.end() > range1.start()) {
5147         AddError(result->full_name(), proto.reserved_range(i),
5148                  DescriptorPool::ErrorCollector::NUMBER,
5149                  strings::Substitute("Reserved range $0 to $1 overlaps with "
5150                                   "already-defined range $2 to $3.",
5151                                   range2.start(), range2.end() - 1,
5152                                   range1.start(), range1.end() - 1));
5153       }
5154     }
5155   }
5156 
5157   HASH_SET<std::string> reserved_name_set;
5158   for (int i = 0; i < proto.reserved_name_size(); i++) {
5159     const std::string& name = proto.reserved_name(i);
5160     if (reserved_name_set.find(name) == reserved_name_set.end()) {
5161       reserved_name_set.insert(name);
5162     } else {
5163       AddError(name, proto, DescriptorPool::ErrorCollector::NAME,
5164                strings::Substitute("Field name \"$0\" is reserved multiple times.",
5165                                 name));
5166     }
5167   }
5168 
5169 
5170   for (int i = 0; i < result->field_count(); i++) {
5171     const FieldDescriptor* field = result->field(i);
5172     for (int j = 0; j < result->extension_range_count(); j++) {
5173       const Descriptor::ExtensionRange* range = result->extension_range(j);
5174       if (range->start <= field->number() && field->number() < range->end) {
5175         AddError(
5176             field->full_name(), proto.extension_range(j),
5177             DescriptorPool::ErrorCollector::NUMBER,
5178             strings::Substitute(
5179                 "Extension range $0 to $1 includes field \"$2\" ($3).",
5180                 range->start, range->end - 1, field->name(), field->number()));
5181       }
5182     }
5183     for (int j = 0; j < result->reserved_range_count(); j++) {
5184       const Descriptor::ReservedRange* range = result->reserved_range(j);
5185       if (range->start <= field->number() && field->number() < range->end) {
5186         AddError(field->full_name(), proto.reserved_range(j),
5187                  DescriptorPool::ErrorCollector::NUMBER,
5188                  strings::Substitute("Field \"$0\" uses reserved number $1.",
5189                                   field->name(), field->number()));
5190       }
5191     }
5192     if (reserved_name_set.find(field->name()) != reserved_name_set.end()) {
5193       AddError(
5194           field->full_name(), proto.field(i),
5195           DescriptorPool::ErrorCollector::NAME,
5196           strings::Substitute("Field name \"$0\" is reserved.", field->name()));
5197     }
5198 
5199   }
5200 
5201   // Check that extension ranges don't overlap and don't include
5202   // reserved field numbers or names.
5203   for (int i = 0; i < result->extension_range_count(); i++) {
5204     const Descriptor::ExtensionRange* range1 = result->extension_range(i);
5205     for (int j = 0; j < result->reserved_range_count(); j++) {
5206       const Descriptor::ReservedRange* range2 = result->reserved_range(j);
5207       if (range1->end > range2->start && range2->end > range1->start) {
5208         AddError(result->full_name(), proto.extension_range(i),
5209                  DescriptorPool::ErrorCollector::NUMBER,
5210                  strings::Substitute("Extension range $0 to $1 overlaps with "
5211                                   "reserved range $2 to $3.",
5212                                   range1->start, range1->end - 1, range2->start,
5213                                   range2->end - 1));
5214       }
5215     }
5216     for (int j = i + 1; j < result->extension_range_count(); j++) {
5217       const Descriptor::ExtensionRange* range2 = result->extension_range(j);
5218       if (range1->end > range2->start && range2->end > range1->start) {
5219         AddError(result->full_name(), proto.extension_range(i),
5220                  DescriptorPool::ErrorCollector::NUMBER,
5221                  strings::Substitute("Extension range $0 to $1 overlaps with "
5222                                   "already-defined range $2 to $3.",
5223                                   range2->start, range2->end - 1, range1->start,
5224                                   range1->end - 1));
5225       }
5226     }
5227   }
5228 }
5229 
BuildFieldOrExtension(const FieldDescriptorProto & proto,Descriptor * parent,FieldDescriptor * result,bool is_extension)5230 void DescriptorBuilder::BuildFieldOrExtension(const FieldDescriptorProto& proto,
5231                                               Descriptor* parent,
5232                                               FieldDescriptor* result,
5233                                               bool is_extension) {
5234   const std::string& scope =
5235       (parent == nullptr) ? file_->package() : parent->full_name();
5236 
5237   // We allocate all names in a single array, and dedup them.
5238   // We remember the indices for the potentially deduped values.
5239   auto all_names = tables_->AllocateFieldNames(
5240       proto.name(), scope,
5241       proto.has_json_name() ? &proto.json_name() : nullptr);
5242   result->all_names_ = all_names.array;
5243   result->lowercase_name_index_ = all_names.lowercase_index;
5244   result->camelcase_name_index_ = all_names.camelcase_index;
5245   result->json_name_index_ = all_names.json_index;
5246 
5247   ValidateSymbolName(proto.name(), result->full_name(), proto);
5248 
5249   result->file_ = file_;
5250   result->number_ = proto.number();
5251   result->is_extension_ = is_extension;
5252   result->is_oneof_ = false;
5253   result->proto3_optional_ = proto.proto3_optional();
5254 
5255   if (proto.proto3_optional() &&
5256       file_->syntax() != FileDescriptor::SYNTAX_PROTO3) {
5257     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
5258              "The [proto3_optional=true] option may only be set on proto3"
5259              "fields, not " +
5260                  result->full_name());
5261   }
5262 
5263   result->has_json_name_ = proto.has_json_name();
5264 
5265   // Some compilers do not allow static_cast directly between two enum types,
5266   // so we must cast to int first.
5267   result->type_ = static_cast<FieldDescriptor::Type>(
5268       implicit_cast<int>(proto.type()));
5269   result->label_ = static_cast<FieldDescriptor::Label>(
5270       implicit_cast<int>(proto.label()));
5271 
5272   if (result->label_ == FieldDescriptor::LABEL_REQUIRED) {
5273     // An extension cannot have a required field (b/13365836).
5274     if (result->is_extension_) {
5275       AddError(result->full_name(), proto,
5276                // Error location `TYPE`: we would really like to indicate
5277                // `LABEL`, but the `ErrorLocation` enum has no entry for this,
5278                // and we don't necessarily know about all implementations of the
5279                // `ErrorCollector` interface to extend them to handle the new
5280                // error location type properly.
5281                DescriptorPool::ErrorCollector::TYPE,
5282                "The extension " + result->full_name() + " cannot be required.");
5283     }
5284   }
5285 
5286   // Some of these may be filled in when cross-linking.
5287   result->containing_type_ = nullptr;
5288   result->type_once_ = nullptr;
5289   result->default_value_enum_ = nullptr;
5290 
5291   result->has_default_value_ = proto.has_default_value();
5292   if (proto.has_default_value() && result->is_repeated()) {
5293     AddError(result->full_name(), proto,
5294              DescriptorPool::ErrorCollector::DEFAULT_VALUE,
5295              "Repeated fields can't have default values.");
5296   }
5297 
5298   if (proto.has_type()) {
5299     if (proto.has_default_value()) {
5300       char* end_pos = nullptr;
5301       switch (result->cpp_type()) {
5302         case FieldDescriptor::CPPTYPE_INT32:
5303           result->default_value_int32_t_ =
5304               strtol(proto.default_value().c_str(), &end_pos, 0);
5305           break;
5306         case FieldDescriptor::CPPTYPE_INT64:
5307           result->default_value_int64_t_ =
5308               strto64(proto.default_value().c_str(), &end_pos, 0);
5309           break;
5310         case FieldDescriptor::CPPTYPE_UINT32:
5311           result->default_value_uint32_t_ =
5312               strtoul(proto.default_value().c_str(), &end_pos, 0);
5313           break;
5314         case FieldDescriptor::CPPTYPE_UINT64:
5315           result->default_value_uint64_t_ =
5316               strtou64(proto.default_value().c_str(), &end_pos, 0);
5317           break;
5318         case FieldDescriptor::CPPTYPE_FLOAT:
5319           if (proto.default_value() == "inf") {
5320             result->default_value_float_ =
5321                 std::numeric_limits<float>::infinity();
5322           } else if (proto.default_value() == "-inf") {
5323             result->default_value_float_ =
5324                 -std::numeric_limits<float>::infinity();
5325           } else if (proto.default_value() == "nan") {
5326             result->default_value_float_ =
5327                 std::numeric_limits<float>::quiet_NaN();
5328           } else {
5329             result->default_value_float_ = io::SafeDoubleToFloat(
5330                 io::NoLocaleStrtod(proto.default_value().c_str(), &end_pos));
5331           }
5332           break;
5333         case FieldDescriptor::CPPTYPE_DOUBLE:
5334           if (proto.default_value() == "inf") {
5335             result->default_value_double_ =
5336                 std::numeric_limits<double>::infinity();
5337           } else if (proto.default_value() == "-inf") {
5338             result->default_value_double_ =
5339                 -std::numeric_limits<double>::infinity();
5340           } else if (proto.default_value() == "nan") {
5341             result->default_value_double_ =
5342                 std::numeric_limits<double>::quiet_NaN();
5343           } else {
5344             result->default_value_double_ =
5345                 io::NoLocaleStrtod(proto.default_value().c_str(), &end_pos);
5346           }
5347           break;
5348         case FieldDescriptor::CPPTYPE_BOOL:
5349           if (proto.default_value() == "true") {
5350             result->default_value_bool_ = true;
5351           } else if (proto.default_value() == "false") {
5352             result->default_value_bool_ = false;
5353           } else {
5354             AddError(result->full_name(), proto,
5355                      DescriptorPool::ErrorCollector::DEFAULT_VALUE,
5356                      "Boolean default must be true or false.");
5357           }
5358           break;
5359         case FieldDescriptor::CPPTYPE_ENUM:
5360           // This will be filled in when cross-linking.
5361           result->default_value_enum_ = nullptr;
5362           break;
5363         case FieldDescriptor::CPPTYPE_STRING:
5364           if (result->type() == FieldDescriptor::TYPE_BYTES) {
5365             result->default_value_string_ = tables_->AllocateString(
5366                 UnescapeCEscapeString(proto.default_value()));
5367           } else {
5368             result->default_value_string_ =
5369                 tables_->AllocateString(proto.default_value());
5370           }
5371           break;
5372         case FieldDescriptor::CPPTYPE_MESSAGE:
5373           AddError(result->full_name(), proto,
5374                    DescriptorPool::ErrorCollector::DEFAULT_VALUE,
5375                    "Messages can't have default values.");
5376           result->has_default_value_ = false;
5377           result->default_generated_instance_ = nullptr;
5378           break;
5379       }
5380 
5381       if (end_pos != nullptr) {
5382         // end_pos is only set non-null by the parsers for numeric types,
5383         // above. This checks that the default was non-empty and had no extra
5384         // junk after the end of the number.
5385         if (proto.default_value().empty() || *end_pos != '\0') {
5386           AddError(result->full_name(), proto,
5387                    DescriptorPool::ErrorCollector::DEFAULT_VALUE,
5388                    "Couldn't parse default value \"" + proto.default_value() +
5389                        "\".");
5390         }
5391       }
5392     } else {
5393       // No explicit default value
5394       switch (result->cpp_type()) {
5395         case FieldDescriptor::CPPTYPE_INT32:
5396           result->default_value_int32_t_ = 0;
5397           break;
5398         case FieldDescriptor::CPPTYPE_INT64:
5399           result->default_value_int64_t_ = 0;
5400           break;
5401         case FieldDescriptor::CPPTYPE_UINT32:
5402           result->default_value_uint32_t_ = 0;
5403           break;
5404         case FieldDescriptor::CPPTYPE_UINT64:
5405           result->default_value_uint64_t_ = 0;
5406           break;
5407         case FieldDescriptor::CPPTYPE_FLOAT:
5408           result->default_value_float_ = 0.0f;
5409           break;
5410         case FieldDescriptor::CPPTYPE_DOUBLE:
5411           result->default_value_double_ = 0.0;
5412           break;
5413         case FieldDescriptor::CPPTYPE_BOOL:
5414           result->default_value_bool_ = false;
5415           break;
5416         case FieldDescriptor::CPPTYPE_ENUM:
5417           // This will be filled in when cross-linking.
5418           result->default_value_enum_ = nullptr;
5419           break;
5420         case FieldDescriptor::CPPTYPE_STRING:
5421           result->default_value_string_ = &internal::GetEmptyString();
5422           break;
5423         case FieldDescriptor::CPPTYPE_MESSAGE:
5424           result->default_generated_instance_ = nullptr;
5425           break;
5426       }
5427     }
5428   }
5429 
5430   if (result->number() <= 0) {
5431     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5432              "Field numbers must be positive integers.");
5433   } else if (!is_extension && result->number() > FieldDescriptor::kMaxNumber) {
5434     // Only validate that the number is within the valid field range if it is
5435     // not an extension. Since extension numbers are validated with the
5436     // extendee's valid set of extension numbers, and those are in turn
5437     // validated against the max allowed number, the check is unnecessary for
5438     // extension fields.
5439     // This avoids cross-linking issues that arise when attempting to check if
5440     // the extendee is a message_set_wire_format message, which has a higher max
5441     // on extension numbers.
5442     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5443              strings::Substitute("Field numbers cannot be greater than $0.",
5444                               FieldDescriptor::kMaxNumber));
5445   } else if (result->number() >= FieldDescriptor::kFirstReservedNumber &&
5446              result->number() <= FieldDescriptor::kLastReservedNumber) {
5447     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5448              strings::Substitute(
5449                  "Field numbers $0 through $1 are reserved for the protocol "
5450                  "buffer library implementation.",
5451                  FieldDescriptor::kFirstReservedNumber,
5452                  FieldDescriptor::kLastReservedNumber));
5453   }
5454 
5455   if (is_extension) {
5456     if (!proto.has_extendee()) {
5457       AddError(result->full_name(), proto,
5458                DescriptorPool::ErrorCollector::EXTENDEE,
5459                "FieldDescriptorProto.extendee not set for extension field.");
5460     }
5461 
5462     result->scope_.extension_scope = parent;
5463 
5464     if (proto.has_oneof_index()) {
5465       AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
5466                "FieldDescriptorProto.oneof_index should not be set for "
5467                "extensions.");
5468     }
5469   } else {
5470     if (proto.has_extendee()) {
5471       AddError(result->full_name(), proto,
5472                DescriptorPool::ErrorCollector::EXTENDEE,
5473                "FieldDescriptorProto.extendee set for non-extension field.");
5474     }
5475 
5476     result->containing_type_ = parent;
5477 
5478     if (proto.has_oneof_index()) {
5479       if (proto.oneof_index() < 0 ||
5480           proto.oneof_index() >= parent->oneof_decl_count()) {
5481         AddError(result->full_name(), proto,
5482                  DescriptorPool::ErrorCollector::TYPE,
5483                  strings::Substitute("FieldDescriptorProto.oneof_index $0 is "
5484                                   "out of range for type \"$1\".",
5485                                   proto.oneof_index(), parent->name()));
5486       } else {
5487         result->is_oneof_ = true;
5488         result->scope_.containing_oneof =
5489             parent->oneof_decl(proto.oneof_index());
5490       }
5491     }
5492   }
5493 
5494   // Copy options.
5495   result->options_ = nullptr;  // Set to default_instance later if necessary.
5496   if (proto.has_options()) {
5497     AllocateOptions(proto.options(), result,
5498                     FieldDescriptorProto::kOptionsFieldNumber,
5499                     "google.protobuf.FieldOptions");
5500   }
5501 
5502 
5503   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
5504 }
5505 
BuildExtensionRange(const DescriptorProto::ExtensionRange & proto,const Descriptor * parent,Descriptor::ExtensionRange * result)5506 void DescriptorBuilder::BuildExtensionRange(
5507     const DescriptorProto::ExtensionRange& proto, const Descriptor* parent,
5508     Descriptor::ExtensionRange* result) {
5509   result->start = proto.start();
5510   result->end = proto.end();
5511   if (result->start <= 0) {
5512     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5513              "Extension numbers must be positive integers.");
5514   }
5515 
5516   // Checking of the upper bound of the extension range is deferred until after
5517   // options interpreting. This allows messages with message_set_wire_format to
5518   // have extensions beyond FieldDescriptor::kMaxNumber, since the extension
5519   // numbers are actually used as int32s in the message_set_wire_format.
5520 
5521   if (result->start >= result->end) {
5522     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5523              "Extension range end number must be greater than start number.");
5524   }
5525 
5526   result->options_ = nullptr;  // Set to default_instance later if necessary.
5527   if (proto.has_options()) {
5528     std::vector<int> options_path;
5529     parent->GetLocationPath(&options_path);
5530     options_path.push_back(DescriptorProto::kExtensionRangeFieldNumber);
5531     // find index of this extension range in order to compute path
5532     int index;
5533     for (index = 0; parent->extension_ranges_ + index != result; index++) {
5534     }
5535     options_path.push_back(index);
5536     options_path.push_back(DescriptorProto_ExtensionRange::kOptionsFieldNumber);
5537     AllocateOptionsImpl(parent->full_name(), parent->full_name(),
5538                         proto.options(), result, options_path,
5539                         "google.protobuf.ExtensionRangeOptions");
5540   }
5541 }
5542 
BuildReservedRange(const DescriptorProto::ReservedRange & proto,const Descriptor * parent,Descriptor::ReservedRange * result)5543 void DescriptorBuilder::BuildReservedRange(
5544     const DescriptorProto::ReservedRange& proto, const Descriptor* parent,
5545     Descriptor::ReservedRange* result) {
5546   result->start = proto.start();
5547   result->end = proto.end();
5548   if (result->start <= 0) {
5549     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5550              "Reserved numbers must be positive integers.");
5551   }
5552 }
5553 
BuildReservedRange(const EnumDescriptorProto::EnumReservedRange & proto,const EnumDescriptor * parent,EnumDescriptor::ReservedRange * result)5554 void DescriptorBuilder::BuildReservedRange(
5555     const EnumDescriptorProto::EnumReservedRange& proto,
5556     const EnumDescriptor* parent, EnumDescriptor::ReservedRange* result) {
5557   result->start = proto.start();
5558   result->end = proto.end();
5559 
5560   if (result->start > result->end) {
5561     AddError(parent->full_name(), proto, DescriptorPool::ErrorCollector::NUMBER,
5562              "Reserved range end number must be greater than start number.");
5563   }
5564 }
5565 
BuildOneof(const OneofDescriptorProto & proto,Descriptor * parent,OneofDescriptor * result)5566 void DescriptorBuilder::BuildOneof(const OneofDescriptorProto& proto,
5567                                    Descriptor* parent,
5568                                    OneofDescriptor* result) {
5569   result->all_names_ = AllocateNameStrings(parent->full_name(), proto.name());
5570   ValidateSymbolName(proto.name(), result->full_name(), proto);
5571 
5572   result->containing_type_ = parent;
5573 
5574   // We need to fill these in later.
5575   result->field_count_ = 0;
5576   result->fields_ = nullptr;
5577   result->options_ = nullptr;
5578 
5579   // Copy options.
5580   if (proto.has_options()) {
5581     AllocateOptions(proto.options(), result,
5582                     OneofDescriptorProto::kOptionsFieldNumber,
5583                     "google.protobuf.OneofOptions");
5584   }
5585 
5586   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
5587 }
5588 
CheckEnumValueUniqueness(const EnumDescriptorProto & proto,const EnumDescriptor * result)5589 void DescriptorBuilder::CheckEnumValueUniqueness(
5590     const EnumDescriptorProto& proto, const EnumDescriptor* result) {
5591 
5592   // Check that enum labels are still unique when we remove the enum prefix from
5593   // values that have it.
5594   //
5595   // This will fail for something like:
5596   //
5597   //   enum MyEnum {
5598   //     MY_ENUM_FOO = 0;
5599   //     FOO = 1;
5600   //   }
5601   //
5602   // By enforcing this reasonable constraint, we allow code generators to strip
5603   // the prefix and/or PascalCase it without creating conflicts.  This can lead
5604   // to much nicer language-specific enums like:
5605   //
5606   //   enum NameType {
5607   //     FirstName = 1,
5608   //     LastName = 2,
5609   //   }
5610   //
5611   // Instead of:
5612   //
5613   //   enum NameType {
5614   //     NAME_TYPE_FIRST_NAME = 1,
5615   //     NAME_TYPE_LAST_NAME = 2,
5616   //   }
5617   PrefixRemover remover(result->name());
5618   std::map<std::string, const EnumValueDescriptor*> values;
5619   for (int i = 0; i < result->value_count(); i++) {
5620     const EnumValueDescriptor* value = result->value(i);
5621     std::string stripped =
5622         EnumValueToPascalCase(remover.MaybeRemove(value->name()));
5623     std::pair<std::map<std::string, const EnumValueDescriptor*>::iterator, bool>
5624         insert_result = values.insert(std::make_pair(stripped, value));
5625     bool inserted = insert_result.second;
5626 
5627     // We don't throw the error if the two conflicting symbols are identical, or
5628     // if they map to the same number.  In the former case, the normal symbol
5629     // duplication error will fire so we don't need to (and its error message
5630     // will make more sense). We allow the latter case so users can create
5631     // aliases which add or remove the prefix (code generators that do prefix
5632     // stripping should de-dup the labels in this case).
5633     if (!inserted && insert_result.first->second->name() != value->name() &&
5634         insert_result.first->second->number() != value->number()) {
5635       std::string error_message =
5636           "Enum name " + value->name() + " has the same name as " +
5637           values[stripped]->name() +
5638           " if you ignore case and strip out the enum name prefix (if any). "
5639           "This is error-prone and can lead to undefined behavior. "
5640           "Please avoid doing this. If you are using allow_alias, please "
5641           "assign the same numeric value to both enums.";
5642       // There are proto2 enums out there with conflicting names, so to preserve
5643       // compatibility we issue only a warning for proto2.
5644       if (result->file()->syntax() == FileDescriptor::SYNTAX_PROTO2) {
5645         AddWarning(value->full_name(), proto.value(i),
5646                    DescriptorPool::ErrorCollector::NAME, error_message);
5647       } else {
5648         AddError(value->full_name(), proto.value(i),
5649                  DescriptorPool::ErrorCollector::NAME, error_message);
5650       }
5651     }
5652   }
5653 }
5654 
BuildEnum(const EnumDescriptorProto & proto,const Descriptor * parent,EnumDescriptor * result)5655 void DescriptorBuilder::BuildEnum(const EnumDescriptorProto& proto,
5656                                   const Descriptor* parent,
5657                                   EnumDescriptor* result) {
5658   const std::string& scope =
5659       (parent == nullptr) ? file_->package() : parent->full_name();
5660 
5661   result->all_names_ = AllocateNameStrings(scope, proto.name());
5662   ValidateSymbolName(proto.name(), result->full_name(), proto);
5663   result->file_ = file_;
5664   result->containing_type_ = parent;
5665   result->is_placeholder_ = false;
5666   result->is_unqualified_placeholder_ = false;
5667 
5668   if (proto.value_size() == 0) {
5669     // We cannot allow enums with no values because this would mean there
5670     // would be no valid default value for fields of this type.
5671     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
5672              "Enums must contain at least one value.");
5673   }
5674 
5675   BUILD_ARRAY(proto, result, value, BuildEnumValue, result);
5676   BUILD_ARRAY(proto, result, reserved_range, BuildReservedRange, result);
5677 
5678   // Copy reserved names.
5679   int reserved_name_count = proto.reserved_name_size();
5680   result->reserved_name_count_ = reserved_name_count;
5681   result->reserved_names_ =
5682       tables_->AllocateArray<const std::string*>(reserved_name_count);
5683   for (int i = 0; i < reserved_name_count; ++i) {
5684     result->reserved_names_[i] =
5685         tables_->AllocateString(proto.reserved_name(i));
5686   }
5687 
5688   CheckEnumValueUniqueness(proto, result);
5689 
5690   // Copy options.
5691   result->options_ = nullptr;  // Set to default_instance later if necessary.
5692   if (proto.has_options()) {
5693     AllocateOptions(proto.options(), result,
5694                     EnumDescriptorProto::kOptionsFieldNumber,
5695                     "google.protobuf.EnumOptions");
5696   }
5697 
5698   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
5699 
5700   for (int i = 0; i < proto.reserved_range_size(); i++) {
5701     const EnumDescriptorProto_EnumReservedRange& range1 =
5702         proto.reserved_range(i);
5703     for (int j = i + 1; j < proto.reserved_range_size(); j++) {
5704       const EnumDescriptorProto_EnumReservedRange& range2 =
5705           proto.reserved_range(j);
5706       if (range1.end() >= range2.start() && range2.end() >= range1.start()) {
5707         AddError(result->full_name(), proto.reserved_range(i),
5708                  DescriptorPool::ErrorCollector::NUMBER,
5709                  strings::Substitute("Reserved range $0 to $1 overlaps with "
5710                                   "already-defined range $2 to $3.",
5711                                   range2.start(), range2.end(), range1.start(),
5712                                   range1.end()));
5713       }
5714     }
5715   }
5716 
5717   HASH_SET<std::string> reserved_name_set;
5718   for (int i = 0; i < proto.reserved_name_size(); i++) {
5719     const std::string& name = proto.reserved_name(i);
5720     if (reserved_name_set.find(name) == reserved_name_set.end()) {
5721       reserved_name_set.insert(name);
5722     } else {
5723       AddError(name, proto, DescriptorPool::ErrorCollector::NAME,
5724                strings::Substitute("Enum value \"$0\" is reserved multiple times.",
5725                                 name));
5726     }
5727   }
5728 
5729   for (int i = 0; i < result->value_count(); i++) {
5730     const EnumValueDescriptor* value = result->value(i);
5731     for (int j = 0; j < result->reserved_range_count(); j++) {
5732       const EnumDescriptor::ReservedRange* range = result->reserved_range(j);
5733       if (range->start <= value->number() && value->number() <= range->end) {
5734         AddError(value->full_name(), proto.reserved_range(j),
5735                  DescriptorPool::ErrorCollector::NUMBER,
5736                  strings::Substitute("Enum value \"$0\" uses reserved number $1.",
5737                                   value->name(), value->number()));
5738       }
5739     }
5740     if (reserved_name_set.find(value->name()) != reserved_name_set.end()) {
5741       AddError(
5742           value->full_name(), proto.value(i),
5743           DescriptorPool::ErrorCollector::NAME,
5744           strings::Substitute("Enum value \"$0\" is reserved.", value->name()));
5745     }
5746   }
5747 }
5748 
BuildEnumValue(const EnumValueDescriptorProto & proto,const EnumDescriptor * parent,EnumValueDescriptor * result)5749 void DescriptorBuilder::BuildEnumValue(const EnumValueDescriptorProto& proto,
5750                                        const EnumDescriptor* parent,
5751                                        EnumValueDescriptor* result) {
5752   // Note:  full_name for enum values is a sibling to the parent's name, not a
5753   //   child of it.
5754   std::string full_name;
5755   size_t scope_len = parent->full_name().size() - parent->name().size();
5756   full_name.reserve(scope_len + proto.name().size());
5757   full_name.append(parent->full_name().data(), scope_len);
5758   full_name.append(proto.name());
5759 
5760   result->all_names_ =
5761       tables_->AllocateStringArray(proto.name(), std::move(full_name));
5762   result->number_ = proto.number();
5763   result->type_ = parent;
5764 
5765   ValidateSymbolName(proto.name(), result->full_name(), proto);
5766 
5767   // Copy options.
5768   result->options_ = nullptr;  // Set to default_instance later if necessary.
5769   if (proto.has_options()) {
5770     AllocateOptions(proto.options(), result,
5771                     EnumValueDescriptorProto::kOptionsFieldNumber,
5772                     "google.protobuf.EnumValueOptions");
5773   }
5774 
5775   // Again, enum values are weird because we makes them appear as siblings
5776   // of the enum type instead of children of it.  So, we use
5777   // parent->containing_type() as the value's parent.
5778   bool added_to_outer_scope =
5779       AddSymbol(result->full_name(), parent->containing_type(), result->name(),
5780                 proto, Symbol::EnumValue(result, 0));
5781 
5782   // However, we also want to be able to search for values within a single
5783   // enum type, so we add it as a child of the enum type itself, too.
5784   // Note:  This could fail, but if it does, the error has already been
5785   //   reported by the above AddSymbol() call, so we ignore the return code.
5786   bool added_to_inner_scope = file_tables_->AddAliasUnderParent(
5787       parent, result->name(), Symbol::EnumValue(result, 1));
5788 
5789   if (added_to_inner_scope && !added_to_outer_scope) {
5790     // This value did not conflict with any values defined in the same enum,
5791     // but it did conflict with some other symbol defined in the enum type's
5792     // scope.  Let's print an additional error to explain this.
5793     std::string outer_scope;
5794     if (parent->containing_type() == nullptr) {
5795       outer_scope = file_->package();
5796     } else {
5797       outer_scope = parent->containing_type()->full_name();
5798     }
5799 
5800     if (outer_scope.empty()) {
5801       outer_scope = "the global scope";
5802     } else {
5803       outer_scope = "\"" + outer_scope + "\"";
5804     }
5805 
5806     AddError(result->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
5807              "Note that enum values use C++ scoping rules, meaning that "
5808              "enum values are siblings of their type, not children of it.  "
5809              "Therefore, \"" +
5810                  result->name() + "\" must be unique within " + outer_scope +
5811                  ", not just within \"" + parent->name() + "\".");
5812   }
5813 
5814   // An enum is allowed to define two numbers that refer to the same value.
5815   // FindValueByNumber() should return the first such value, so we simply
5816   // ignore AddEnumValueByNumber()'s return code.
5817   file_tables_->AddEnumValueByNumber(result);
5818 }
5819 
BuildService(const ServiceDescriptorProto & proto,const void *,ServiceDescriptor * result)5820 void DescriptorBuilder::BuildService(const ServiceDescriptorProto& proto,
5821                                      const void* /* dummy */,
5822                                      ServiceDescriptor* result) {
5823   result->all_names_ = AllocateNameStrings(file_->package(), proto.name());
5824   result->file_ = file_;
5825   ValidateSymbolName(proto.name(), result->full_name(), proto);
5826 
5827   BUILD_ARRAY(proto, result, method, BuildMethod, result);
5828 
5829   // Copy options.
5830   result->options_ = nullptr;  // Set to default_instance later if necessary.
5831   if (proto.has_options()) {
5832     AllocateOptions(proto.options(), result,
5833                     ServiceDescriptorProto::kOptionsFieldNumber,
5834                     "google.protobuf.ServiceOptions");
5835   }
5836 
5837   AddSymbol(result->full_name(), nullptr, result->name(), proto,
5838             Symbol(result));
5839 }
5840 
BuildMethod(const MethodDescriptorProto & proto,const ServiceDescriptor * parent,MethodDescriptor * result)5841 void DescriptorBuilder::BuildMethod(const MethodDescriptorProto& proto,
5842                                     const ServiceDescriptor* parent,
5843                                     MethodDescriptor* result) {
5844   result->service_ = parent;
5845   result->all_names_ = AllocateNameStrings(parent->full_name(), proto.name());
5846 
5847   ValidateSymbolName(proto.name(), result->full_name(), proto);
5848 
5849   // These will be filled in when cross-linking.
5850   result->input_type_.Init();
5851   result->output_type_.Init();
5852 
5853   // Copy options.
5854   result->options_ = nullptr;  // Set to default_instance later if necessary.
5855   if (proto.has_options()) {
5856     AllocateOptions(proto.options(), result,
5857                     MethodDescriptorProto::kOptionsFieldNumber,
5858                     "google.protobuf.MethodOptions");
5859   }
5860 
5861   result->client_streaming_ = proto.client_streaming();
5862   result->server_streaming_ = proto.server_streaming();
5863 
5864   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
5865 }
5866 
5867 #undef BUILD_ARRAY
5868 
5869 // -------------------------------------------------------------------
5870 
CrossLinkFile(FileDescriptor * file,const FileDescriptorProto & proto)5871 void DescriptorBuilder::CrossLinkFile(FileDescriptor* file,
5872                                       const FileDescriptorProto& proto) {
5873   if (file->options_ == nullptr) {
5874     file->options_ = &FileOptions::default_instance();
5875   }
5876 
5877   for (int i = 0; i < file->message_type_count(); i++) {
5878     CrossLinkMessage(&file->message_types_[i], proto.message_type(i));
5879   }
5880 
5881   for (int i = 0; i < file->extension_count(); i++) {
5882     CrossLinkField(&file->extensions_[i], proto.extension(i));
5883   }
5884 
5885   for (int i = 0; i < file->enum_type_count(); i++) {
5886     CrossLinkEnum(&file->enum_types_[i], proto.enum_type(i));
5887   }
5888 
5889   for (int i = 0; i < file->service_count(); i++) {
5890     CrossLinkService(&file->services_[i], proto.service(i));
5891   }
5892 }
5893 
CrossLinkMessage(Descriptor * message,const DescriptorProto & proto)5894 void DescriptorBuilder::CrossLinkMessage(Descriptor* message,
5895                                          const DescriptorProto& proto) {
5896   if (message->options_ == nullptr) {
5897     message->options_ = &MessageOptions::default_instance();
5898   }
5899 
5900   for (int i = 0; i < message->nested_type_count(); i++) {
5901     CrossLinkMessage(&message->nested_types_[i], proto.nested_type(i));
5902   }
5903 
5904   for (int i = 0; i < message->enum_type_count(); i++) {
5905     CrossLinkEnum(&message->enum_types_[i], proto.enum_type(i));
5906   }
5907 
5908   for (int i = 0; i < message->field_count(); i++) {
5909     CrossLinkField(&message->fields_[i], proto.field(i));
5910   }
5911 
5912   for (int i = 0; i < message->extension_count(); i++) {
5913     CrossLinkField(&message->extensions_[i], proto.extension(i));
5914   }
5915 
5916   for (int i = 0; i < message->extension_range_count(); i++) {
5917     CrossLinkExtensionRange(&message->extension_ranges_[i],
5918                             proto.extension_range(i));
5919   }
5920 
5921   // Set up field array for each oneof.
5922 
5923   // First count the number of fields per oneof.
5924   for (int i = 0; i < message->field_count(); i++) {
5925     const OneofDescriptor* oneof_decl = message->field(i)->containing_oneof();
5926     if (oneof_decl != nullptr) {
5927       // Make sure fields belonging to the same oneof are defined consecutively.
5928       // This enables optimizations in codegens and reflection libraries to
5929       // skip fields in the oneof group, as only one of the field can be set.
5930       // Note that field_count() returns how many fields in this oneof we have
5931       // seen so far. field_count() > 0 guarantees that i > 0, so field(i-1) is
5932       // safe.
5933       if (oneof_decl->field_count() > 0 &&
5934           message->field(i - 1)->containing_oneof() != oneof_decl) {
5935         AddError(message->full_name() + "." + message->field(i - 1)->name(),
5936                  proto.field(i - 1), DescriptorPool::ErrorCollector::TYPE,
5937                  strings::Substitute(
5938                      "Fields in the same oneof must be defined consecutively. "
5939                      "\"$0\" cannot be defined before the completion of the "
5940                      "\"$1\" oneof definition.",
5941                      message->field(i - 1)->name(), oneof_decl->name()));
5942       }
5943       // Must go through oneof_decls_ array to get a non-const version of the
5944       // OneofDescriptor.
5945       ++message->oneof_decls_[oneof_decl->index()].field_count_;
5946     }
5947   }
5948 
5949   // Then allocate the arrays.
5950   for (int i = 0; i < message->oneof_decl_count(); i++) {
5951     OneofDescriptor* oneof_decl = &message->oneof_decls_[i];
5952 
5953     if (oneof_decl->field_count() == 0) {
5954       AddError(message->full_name() + "." + oneof_decl->name(),
5955                proto.oneof_decl(i), DescriptorPool::ErrorCollector::NAME,
5956                "Oneof must have at least one field.");
5957     }
5958 
5959     oneof_decl->fields_ = tables_->AllocateArray<const FieldDescriptor*>(
5960         oneof_decl->field_count_);
5961     oneof_decl->field_count_ = 0;
5962 
5963     if (oneof_decl->options_ == nullptr) {
5964       oneof_decl->options_ = &OneofOptions::default_instance();
5965     }
5966   }
5967 
5968   // Then fill them in.
5969   for (int i = 0; i < message->field_count(); i++) {
5970     const OneofDescriptor* oneof_decl = message->field(i)->containing_oneof();
5971     if (oneof_decl != nullptr) {
5972       OneofDescriptor* mutable_oneof_decl =
5973           &message->oneof_decls_[oneof_decl->index()];
5974       message->fields_[i].index_in_oneof_ = mutable_oneof_decl->field_count_;
5975       mutable_oneof_decl->fields_[mutable_oneof_decl->field_count_++] =
5976           message->field(i);
5977     }
5978   }
5979 
5980   for (int i = 0; i < message->field_count(); i++) {
5981     const FieldDescriptor* field = message->field(i);
5982     if (field->proto3_optional_) {
5983       if (!field->containing_oneof() ||
5984           !field->containing_oneof()->is_synthetic()) {
5985         AddError(message->full_name(), proto.field(i),
5986                  DescriptorPool::ErrorCollector::OTHER,
5987                  "Fields with proto3_optional set must be "
5988                  "a member of a one-field oneof");
5989       }
5990     }
5991   }
5992 
5993   // Synthetic oneofs must be last.
5994   int first_synthetic = -1;
5995   for (int i = 0; i < message->oneof_decl_count(); i++) {
5996     const OneofDescriptor* oneof = message->oneof_decl(i);
5997     if (oneof->is_synthetic()) {
5998       if (first_synthetic == -1) {
5999         first_synthetic = i;
6000       }
6001     } else {
6002       if (first_synthetic != -1) {
6003         AddError(message->full_name(), proto.oneof_decl(i),
6004                  DescriptorPool::ErrorCollector::OTHER,
6005                  "Synthetic oneofs must be after all other oneofs");
6006       }
6007     }
6008   }
6009 
6010   if (first_synthetic == -1) {
6011     message->real_oneof_decl_count_ = message->oneof_decl_count_;
6012   } else {
6013     message->real_oneof_decl_count_ = first_synthetic;
6014   }
6015 }
6016 
CrossLinkExtensionRange(Descriptor::ExtensionRange * range,const DescriptorProto::ExtensionRange & proto)6017 void DescriptorBuilder::CrossLinkExtensionRange(
6018     Descriptor::ExtensionRange* range,
6019     const DescriptorProto::ExtensionRange& proto) {
6020   if (range->options_ == nullptr) {
6021     range->options_ = &ExtensionRangeOptions::default_instance();
6022   }
6023 }
6024 
CrossLinkField(FieldDescriptor * field,const FieldDescriptorProto & proto)6025 void DescriptorBuilder::CrossLinkField(FieldDescriptor* field,
6026                                        const FieldDescriptorProto& proto) {
6027   if (field->options_ == nullptr) {
6028     field->options_ = &FieldOptions::default_instance();
6029   }
6030 
6031   // Add the field to the lowercase-name and camelcase-name tables.
6032   file_tables_->AddFieldByStylizedNames(field);
6033 
6034   if (proto.has_extendee()) {
6035     Symbol extendee =
6036         LookupSymbol(proto.extendee(), field->full_name(),
6037                      DescriptorPool::PLACEHOLDER_EXTENDABLE_MESSAGE);
6038     if (extendee.IsNull()) {
6039       AddNotDefinedError(field->full_name(), proto,
6040                          DescriptorPool::ErrorCollector::EXTENDEE,
6041                          proto.extendee());
6042       return;
6043     } else if (extendee.type() != Symbol::MESSAGE) {
6044       AddError(field->full_name(), proto,
6045                DescriptorPool::ErrorCollector::EXTENDEE,
6046                "\"" + proto.extendee() + "\" is not a message type.");
6047       return;
6048     }
6049     field->containing_type_ = extendee.descriptor();
6050 
6051     const Descriptor::ExtensionRange* extension_range =
6052         field->containing_type()->FindExtensionRangeContainingNumber(
6053             field->number());
6054 
6055     if (extension_range == nullptr) {
6056       // Set of valid extension numbers for MessageSet is different (< 2^32)
6057       // from other extendees (< 2^29). If unknown deps are allowed, we may not
6058       // have that information, and wrongly deem the extension as invalid.
6059       auto skip_check = get_allow_unknown(pool_) &&
6060                         proto.extendee() == "google.protobuf.bridge.MessageSet";
6061       if (!skip_check) {
6062         AddError(field->full_name(), proto,
6063                  DescriptorPool::ErrorCollector::NUMBER,
6064                  strings::Substitute("\"$0\" does not declare $1 as an "
6065                                   "extension number.",
6066                                   field->containing_type()->full_name(),
6067                                   field->number()));
6068       }
6069     }
6070   }
6071 
6072   if (field->containing_oneof() != nullptr) {
6073     if (field->label() != FieldDescriptor::LABEL_OPTIONAL) {
6074       // Note that this error will never happen when parsing .proto files.
6075       // It can only happen if you manually construct a FileDescriptorProto
6076       // that is incorrect.
6077       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6078                "Fields of oneofs must themselves have label LABEL_OPTIONAL.");
6079     }
6080   }
6081 
6082   if (proto.has_type_name()) {
6083     // Assume we are expecting a message type unless the proto contains some
6084     // evidence that it expects an enum type.  This only makes a difference if
6085     // we end up creating a placeholder.
6086     bool expecting_enum = (proto.type() == FieldDescriptorProto::TYPE_ENUM) ||
6087                           proto.has_default_value();
6088 
6089     // In case of weak fields we force building the dependency. We need to know
6090     // if the type exist or not. If it doesn't exist we substitute Empty which
6091     // should only be done if the type can't be found in the generated pool.
6092     // TODO(gerbens) Ideally we should query the database directly to check
6093     // if weak fields exist or not so that we don't need to force building
6094     // weak dependencies. However the name lookup rules for symbols are
6095     // somewhat complicated, so I defer it too another CL.
6096     bool is_weak = !pool_->enforce_weak_ && proto.options().weak();
6097     bool is_lazy = pool_->lazily_build_dependencies_ && !is_weak;
6098 
6099     Symbol type =
6100         LookupSymbol(proto.type_name(), field->full_name(),
6101                      expecting_enum ? DescriptorPool::PLACEHOLDER_ENUM
6102                                     : DescriptorPool::PLACEHOLDER_MESSAGE,
6103                      LOOKUP_TYPES, !is_lazy);
6104 
6105     if (type.IsNull()) {
6106       if (is_lazy) {
6107         // Save the symbol names for later for lookup, and allocate the once
6108         // object needed for the accessors.
6109         std::string name = proto.type_name();
6110         field->type_once_ = tables_->AllocateLazyInit();
6111         field->type_once_->field.type_name = tables_->AllocateString(name);
6112         if (proto.has_default_value()) {
6113           field->type_once_->field.default_value_enum_name =
6114               tables_->AllocateString(proto.default_value());
6115         }
6116         // AddFieldByNumber and AddExtension are done later in this function,
6117         // and can/must be done if the field type was not found. The related
6118         // error checking is not necessary when in lazily_build_dependencies_
6119         // mode, and can't be done without building the type's descriptor,
6120         // which we don't want to do.
6121         file_tables_->AddFieldByNumber(field);
6122         if (field->is_extension()) {
6123           tables_->AddExtension(field);
6124         }
6125         return;
6126       } else {
6127         // If the type is a weak type, we change the type to a google.protobuf.Empty
6128         // field.
6129         if (is_weak) {
6130           type = FindSymbol(kNonLinkedWeakMessageReplacementName);
6131         }
6132         if (type.IsNull()) {
6133           AddNotDefinedError(field->full_name(), proto,
6134                              DescriptorPool::ErrorCollector::TYPE,
6135                              proto.type_name());
6136           return;
6137         }
6138       }
6139     }
6140 
6141     if (!proto.has_type()) {
6142       // Choose field type based on symbol.
6143       if (type.type() == Symbol::MESSAGE) {
6144         field->type_ = FieldDescriptor::TYPE_MESSAGE;
6145       } else if (type.type() == Symbol::ENUM) {
6146         field->type_ = FieldDescriptor::TYPE_ENUM;
6147       } else {
6148         AddError(field->full_name(), proto,
6149                  DescriptorPool::ErrorCollector::TYPE,
6150                  "\"" + proto.type_name() + "\" is not a type.");
6151         return;
6152       }
6153     }
6154 
6155     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
6156       field->type_descriptor_.message_type = type.descriptor();
6157       if (field->type_descriptor_.message_type == nullptr) {
6158         AddError(field->full_name(), proto,
6159                  DescriptorPool::ErrorCollector::TYPE,
6160                  "\"" + proto.type_name() + "\" is not a message type.");
6161         return;
6162       }
6163 
6164       if (field->has_default_value()) {
6165         AddError(field->full_name(), proto,
6166                  DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6167                  "Messages can't have default values.");
6168       }
6169     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
6170       field->type_descriptor_.enum_type = type.enum_descriptor();
6171       if (field->type_descriptor_.enum_type == nullptr) {
6172         AddError(field->full_name(), proto,
6173                  DescriptorPool::ErrorCollector::TYPE,
6174                  "\"" + proto.type_name() + "\" is not an enum type.");
6175         return;
6176       }
6177 
6178       if (field->enum_type()->is_placeholder_) {
6179         // We can't look up default values for placeholder types.  We'll have
6180         // to just drop them.
6181         field->has_default_value_ = false;
6182       }
6183 
6184       if (field->has_default_value()) {
6185         // Ensure that the default value is an identifier. Parser cannot always
6186         // verify this because it does not have complete type information.
6187         // N.B. that this check yields better error messages but is not
6188         // necessary for correctness (an enum symbol must be a valid identifier
6189         // anyway), only for better errors.
6190         if (!io::Tokenizer::IsIdentifier(proto.default_value())) {
6191           AddError(field->full_name(), proto,
6192                    DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6193                    "Default value for an enum field must be an identifier.");
6194         } else {
6195           // We can't just use field->enum_type()->FindValueByName() here
6196           // because that locks the pool's mutex, which we have already locked
6197           // at this point.
6198           const EnumValueDescriptor* default_value =
6199               LookupSymbolNoPlaceholder(proto.default_value(),
6200                                         field->enum_type()->full_name())
6201                   .enum_value_descriptor();
6202 
6203           if (default_value != nullptr &&
6204               default_value->type() == field->enum_type()) {
6205             field->default_value_enum_ = default_value;
6206           } else {
6207             AddError(field->full_name(), proto,
6208                      DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6209                      "Enum type \"" + field->enum_type()->full_name() +
6210                          "\" has no value named \"" + proto.default_value() +
6211                          "\".");
6212           }
6213         }
6214       } else if (field->enum_type()->value_count() > 0) {
6215         // All enums must have at least one value, or we would have reported
6216         // an error elsewhere.  We use the first defined value as the default
6217         // if a default is not explicitly defined.
6218         field->default_value_enum_ = field->enum_type()->value(0);
6219       }
6220     } else {
6221       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6222                "Field with primitive type has type_name.");
6223     }
6224   } else {
6225     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE ||
6226         field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
6227       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6228                "Field with message or enum type missing type_name.");
6229     }
6230   }
6231 
6232   // Add the field to the fields-by-number table.
6233   // Note:  We have to do this *after* cross-linking because extensions do not
6234   // know their containing type until now. If we're in
6235   // lazily_build_dependencies_ mode, we're guaranteed there's no errors, so no
6236   // risk to calling containing_type() or other accessors that will build
6237   // dependencies.
6238   if (!file_tables_->AddFieldByNumber(field)) {
6239     const FieldDescriptor* conflicting_field = file_tables_->FindFieldByNumber(
6240         field->containing_type(), field->number());
6241     std::string containing_type_name =
6242         field->containing_type() == nullptr
6243             ? "unknown"
6244             : field->containing_type()->full_name();
6245     if (field->is_extension()) {
6246       AddError(field->full_name(), proto,
6247                DescriptorPool::ErrorCollector::NUMBER,
6248                strings::Substitute("Extension number $0 has already been used "
6249                                 "in \"$1\" by extension \"$2\".",
6250                                 field->number(), containing_type_name,
6251                                 conflicting_field->full_name()));
6252     } else {
6253       AddError(field->full_name(), proto,
6254                DescriptorPool::ErrorCollector::NUMBER,
6255                strings::Substitute("Field number $0 has already been used in "
6256                                 "\"$1\" by field \"$2\".",
6257                                 field->number(), containing_type_name,
6258                                 conflicting_field->name()));
6259     }
6260   } else {
6261     if (field->is_extension()) {
6262       if (!tables_->AddExtension(field)) {
6263         const FieldDescriptor* conflicting_field =
6264             tables_->FindExtension(field->containing_type(), field->number());
6265         std::string containing_type_name =
6266             field->containing_type() == nullptr
6267                 ? "unknown"
6268                 : field->containing_type()->full_name();
6269         std::string error_msg = strings::Substitute(
6270             "Extension number $0 has already been used in \"$1\" by extension "
6271             "\"$2\" defined in $3.",
6272             field->number(), containing_type_name,
6273             conflicting_field->full_name(), conflicting_field->file()->name());
6274         // Conflicting extension numbers should be an error. However, before
6275         // turning this into an error we need to fix all existing broken
6276         // protos first.
6277         // TODO(xiaofeng): Change this to an error.
6278         AddWarning(field->full_name(), proto,
6279                    DescriptorPool::ErrorCollector::NUMBER, error_msg);
6280       }
6281     }
6282   }
6283 }
6284 
CrossLinkEnum(EnumDescriptor * enum_type,const EnumDescriptorProto & proto)6285 void DescriptorBuilder::CrossLinkEnum(EnumDescriptor* enum_type,
6286                                       const EnumDescriptorProto& proto) {
6287   if (enum_type->options_ == nullptr) {
6288     enum_type->options_ = &EnumOptions::default_instance();
6289   }
6290 
6291   for (int i = 0; i < enum_type->value_count(); i++) {
6292     CrossLinkEnumValue(&enum_type->values_[i], proto.value(i));
6293   }
6294 }
6295 
CrossLinkEnumValue(EnumValueDescriptor * enum_value,const EnumValueDescriptorProto &)6296 void DescriptorBuilder::CrossLinkEnumValue(
6297     EnumValueDescriptor* enum_value,
6298     const EnumValueDescriptorProto& /* proto */) {
6299   if (enum_value->options_ == nullptr) {
6300     enum_value->options_ = &EnumValueOptions::default_instance();
6301   }
6302 }
6303 
CrossLinkService(ServiceDescriptor * service,const ServiceDescriptorProto & proto)6304 void DescriptorBuilder::CrossLinkService(ServiceDescriptor* service,
6305                                          const ServiceDescriptorProto& proto) {
6306   if (service->options_ == nullptr) {
6307     service->options_ = &ServiceOptions::default_instance();
6308   }
6309 
6310   for (int i = 0; i < service->method_count(); i++) {
6311     CrossLinkMethod(&service->methods_[i], proto.method(i));
6312   }
6313 }
6314 
CrossLinkMethod(MethodDescriptor * method,const MethodDescriptorProto & proto)6315 void DescriptorBuilder::CrossLinkMethod(MethodDescriptor* method,
6316                                         const MethodDescriptorProto& proto) {
6317   if (method->options_ == nullptr) {
6318     method->options_ = &MethodOptions::default_instance();
6319   }
6320 
6321   Symbol input_type =
6322       LookupSymbol(proto.input_type(), method->full_name(),
6323                    DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_ALL,
6324                    !pool_->lazily_build_dependencies_);
6325   if (input_type.IsNull()) {
6326     if (!pool_->lazily_build_dependencies_) {
6327       AddNotDefinedError(method->full_name(), proto,
6328                          DescriptorPool::ErrorCollector::INPUT_TYPE,
6329                          proto.input_type());
6330     } else {
6331       method->input_type_.SetLazy(proto.input_type(), file_);
6332     }
6333   } else if (input_type.type() != Symbol::MESSAGE) {
6334     AddError(method->full_name(), proto,
6335              DescriptorPool::ErrorCollector::INPUT_TYPE,
6336              "\"" + proto.input_type() + "\" is not a message type.");
6337   } else {
6338     method->input_type_.Set(input_type.descriptor());
6339   }
6340 
6341   Symbol output_type =
6342       LookupSymbol(proto.output_type(), method->full_name(),
6343                    DescriptorPool::PLACEHOLDER_MESSAGE, LOOKUP_ALL,
6344                    !pool_->lazily_build_dependencies_);
6345   if (output_type.IsNull()) {
6346     if (!pool_->lazily_build_dependencies_) {
6347       AddNotDefinedError(method->full_name(), proto,
6348                          DescriptorPool::ErrorCollector::OUTPUT_TYPE,
6349                          proto.output_type());
6350     } else {
6351       method->output_type_.SetLazy(proto.output_type(), file_);
6352     }
6353   } else if (output_type.type() != Symbol::MESSAGE) {
6354     AddError(method->full_name(), proto,
6355              DescriptorPool::ErrorCollector::OUTPUT_TYPE,
6356              "\"" + proto.output_type() + "\" is not a message type.");
6357   } else {
6358     method->output_type_.Set(output_type.descriptor());
6359   }
6360 }
6361 
6362 // -------------------------------------------------------------------
6363 
6364 #define VALIDATE_OPTIONS_FROM_ARRAY(descriptor, array_name, type) \
6365   for (int i = 0; i < descriptor->array_name##_count(); ++i) {    \
6366     Validate##type##Options(descriptor->array_name##s_ + i,       \
6367                             proto.array_name(i));                 \
6368   }
6369 
6370 // Determine if the file uses optimize_for = LITE_RUNTIME, being careful to
6371 // avoid problems that exist at init time.
IsLite(const FileDescriptor * file)6372 static bool IsLite(const FileDescriptor* file) {
6373   // TODO(kenton):  I don't even remember how many of these conditions are
6374   //   actually possible.  I'm just being super-safe.
6375   return file != nullptr &&
6376          &file->options() != &FileOptions::default_instance() &&
6377          file->options().optimize_for() == FileOptions::LITE_RUNTIME;
6378 }
6379 
ValidateFileOptions(FileDescriptor * file,const FileDescriptorProto & proto)6380 void DescriptorBuilder::ValidateFileOptions(FileDescriptor* file,
6381                                             const FileDescriptorProto& proto) {
6382   VALIDATE_OPTIONS_FROM_ARRAY(file, message_type, Message);
6383   VALIDATE_OPTIONS_FROM_ARRAY(file, enum_type, Enum);
6384   VALIDATE_OPTIONS_FROM_ARRAY(file, service, Service);
6385   VALIDATE_OPTIONS_FROM_ARRAY(file, extension, Field);
6386 
6387   // Lite files can only be imported by other Lite files.
6388   if (!IsLite(file)) {
6389     for (int i = 0; i < file->dependency_count(); i++) {
6390       if (IsLite(file->dependency(i))) {
6391         AddError(
6392             file->dependency(i)->name(), proto,
6393             DescriptorPool::ErrorCollector::IMPORT,
6394             "Files that do not use optimize_for = LITE_RUNTIME cannot import "
6395             "files which do use this option.  This file is not lite, but it "
6396             "imports \"" +
6397                 file->dependency(i)->name() + "\" which is.");
6398         break;
6399       }
6400     }
6401   }
6402   if (file->syntax() == FileDescriptor::SYNTAX_PROTO3) {
6403     ValidateProto3(file, proto);
6404   }
6405 }
6406 
ValidateProto3(FileDescriptor * file,const FileDescriptorProto & proto)6407 void DescriptorBuilder::ValidateProto3(FileDescriptor* file,
6408                                        const FileDescriptorProto& proto) {
6409   for (int i = 0; i < file->extension_count(); ++i) {
6410     ValidateProto3Field(file->extensions_ + i, proto.extension(i));
6411   }
6412   for (int i = 0; i < file->message_type_count(); ++i) {
6413     ValidateProto3Message(file->message_types_ + i, proto.message_type(i));
6414   }
6415   for (int i = 0; i < file->enum_type_count(); ++i) {
6416     ValidateProto3Enum(file->enum_types_ + i, proto.enum_type(i));
6417   }
6418 }
6419 
ToLowercaseWithoutUnderscores(const std::string & name)6420 static std::string ToLowercaseWithoutUnderscores(const std::string& name) {
6421   std::string result;
6422   for (char character : name) {
6423     if (character != '_') {
6424       if (character >= 'A' && character <= 'Z') {
6425         result.push_back(character - 'A' + 'a');
6426       } else {
6427         result.push_back(character);
6428       }
6429     }
6430   }
6431   return result;
6432 }
6433 
ValidateProto3Message(Descriptor * message,const DescriptorProto & proto)6434 void DescriptorBuilder::ValidateProto3Message(Descriptor* message,
6435                                               const DescriptorProto& proto) {
6436   for (int i = 0; i < message->nested_type_count(); ++i) {
6437     ValidateProto3Message(message->nested_types_ + i, proto.nested_type(i));
6438   }
6439   for (int i = 0; i < message->enum_type_count(); ++i) {
6440     ValidateProto3Enum(message->enum_types_ + i, proto.enum_type(i));
6441   }
6442   for (int i = 0; i < message->field_count(); ++i) {
6443     ValidateProto3Field(message->fields_ + i, proto.field(i));
6444   }
6445   for (int i = 0; i < message->extension_count(); ++i) {
6446     ValidateProto3Field(message->extensions_ + i, proto.extension(i));
6447   }
6448   if (message->extension_range_count() > 0) {
6449     AddError(message->full_name(), proto.extension_range(0),
6450              DescriptorPool::ErrorCollector::NUMBER,
6451              "Extension ranges are not allowed in proto3.");
6452   }
6453   if (message->options().message_set_wire_format()) {
6454     // Using MessageSet doesn't make sense since we disallow extensions.
6455     AddError(message->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6456              "MessageSet is not supported in proto3.");
6457   }
6458 
6459   // In proto3, we reject field names if they conflict in camelCase.
6460   // Note that we currently enforce a stricter rule: Field names must be
6461   // unique after being converted to lowercase with underscores removed.
6462   std::map<std::string, const FieldDescriptor*> name_to_field;
6463   for (int i = 0; i < message->field_count(); ++i) {
6464     std::string lowercase_name =
6465         ToLowercaseWithoutUnderscores(message->field(i)->name());
6466     if (name_to_field.find(lowercase_name) != name_to_field.end()) {
6467       AddError(message->full_name(), proto.field(i),
6468                DescriptorPool::ErrorCollector::NAME,
6469                "The JSON camel-case name of field \"" +
6470                    message->field(i)->name() + "\" conflicts with field \"" +
6471                    name_to_field[lowercase_name]->name() + "\". This is not " +
6472                    "allowed in proto3.");
6473     } else {
6474       name_to_field[lowercase_name] = message->field(i);
6475     }
6476   }
6477 }
6478 
ValidateProto3Field(FieldDescriptor * field,const FieldDescriptorProto & proto)6479 void DescriptorBuilder::ValidateProto3Field(FieldDescriptor* field,
6480                                             const FieldDescriptorProto& proto) {
6481   if (field->is_extension() &&
6482       !AllowedExtendeeInProto3(field->containing_type()->full_name())) {
6483     AddError(field->full_name(), proto,
6484              DescriptorPool::ErrorCollector::EXTENDEE,
6485              "Extensions in proto3 are only allowed for defining options.");
6486   }
6487   if (field->is_required()) {
6488     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6489              "Required fields are not allowed in proto3.");
6490   }
6491   if (field->has_default_value()) {
6492     AddError(field->full_name(), proto,
6493              DescriptorPool::ErrorCollector::DEFAULT_VALUE,
6494              "Explicit default values are not allowed in proto3.");
6495   }
6496   if (field->cpp_type() == FieldDescriptor::CPPTYPE_ENUM &&
6497       field->enum_type() &&
6498       field->enum_type()->file()->syntax() != FileDescriptor::SYNTAX_PROTO3 &&
6499       field->enum_type()->file()->syntax() != FileDescriptor::SYNTAX_UNKNOWN) {
6500     // Proto3 messages can only use Proto3 enum types; otherwise we can't
6501     // guarantee that the default value is zero.
6502     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6503              "Enum type \"" + field->enum_type()->full_name() +
6504                  "\" is not a proto3 enum, but is used in \"" +
6505                  field->containing_type()->full_name() +
6506                  "\" which is a proto3 message type.");
6507   }
6508   if (field->type() == FieldDescriptor::TYPE_GROUP) {
6509     AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6510              "Groups are not supported in proto3 syntax.");
6511   }
6512 }
6513 
ValidateProto3Enum(EnumDescriptor * enm,const EnumDescriptorProto & proto)6514 void DescriptorBuilder::ValidateProto3Enum(EnumDescriptor* enm,
6515                                            const EnumDescriptorProto& proto) {
6516   if (enm->value_count() > 0 && enm->value(0)->number() != 0) {
6517     AddError(enm->full_name(), proto.value(0),
6518              DescriptorPool::ErrorCollector::NUMBER,
6519              "The first enum value must be zero in proto3.");
6520   }
6521 }
6522 
ValidateMessageOptions(Descriptor * message,const DescriptorProto & proto)6523 void DescriptorBuilder::ValidateMessageOptions(Descriptor* message,
6524                                                const DescriptorProto& proto) {
6525   VALIDATE_OPTIONS_FROM_ARRAY(message, field, Field);
6526   VALIDATE_OPTIONS_FROM_ARRAY(message, nested_type, Message);
6527   VALIDATE_OPTIONS_FROM_ARRAY(message, enum_type, Enum);
6528   VALIDATE_OPTIONS_FROM_ARRAY(message, extension, Field);
6529 
6530   const int64_t max_extension_range =
6531       static_cast<int64_t>(message->options().message_set_wire_format()
6532                                ? kint32max
6533                                : FieldDescriptor::kMaxNumber);
6534   for (int i = 0; i < message->extension_range_count(); ++i) {
6535     if (message->extension_range(i)->end > max_extension_range + 1) {
6536       AddError(message->full_name(), proto.extension_range(i),
6537                DescriptorPool::ErrorCollector::NUMBER,
6538                strings::Substitute("Extension numbers cannot be greater than $0.",
6539                                 max_extension_range));
6540     }
6541 
6542     ValidateExtensionRangeOptions(message->full_name(),
6543                                   message->extension_ranges_ + i,
6544                                   proto.extension_range(i));
6545   }
6546 }
6547 
6548 
ValidateFieldOptions(FieldDescriptor * field,const FieldDescriptorProto & proto)6549 void DescriptorBuilder::ValidateFieldOptions(
6550     FieldDescriptor* field, const FieldDescriptorProto& proto) {
6551   if (pool_->lazily_build_dependencies_ && (!field || !field->message_type())) {
6552     return;
6553   }
6554   // Only message type fields may be lazy.
6555   if (field->options().lazy()) {
6556     if (field->type() != FieldDescriptor::TYPE_MESSAGE) {
6557       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6558                "[lazy = true] can only be specified for submessage fields.");
6559     }
6560   }
6561 
6562   // Only repeated primitive fields may be packed.
6563   if (field->options().packed() && !field->is_packable()) {
6564     AddError(
6565         field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6566         "[packed = true] can only be specified for repeated primitive fields.");
6567   }
6568 
6569   // Note:  Default instance may not yet be initialized here, so we have to
6570   //   avoid reading from it.
6571   if (field->containing_type_ != nullptr &&
6572       &field->containing_type()->options() !=
6573           &MessageOptions::default_instance() &&
6574       field->containing_type()->options().message_set_wire_format()) {
6575     if (field->is_extension()) {
6576       if (!field->is_optional() ||
6577           field->type() != FieldDescriptor::TYPE_MESSAGE) {
6578         AddError(field->full_name(), proto,
6579                  DescriptorPool::ErrorCollector::TYPE,
6580                  "Extensions of MessageSets must be optional messages.");
6581       }
6582     } else {
6583       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6584                "MessageSets cannot have fields, only extensions.");
6585     }
6586   }
6587 
6588   // Lite extensions can only be of Lite types.
6589   if (IsLite(field->file()) && field->containing_type_ != nullptr &&
6590       !IsLite(field->containing_type()->file())) {
6591     AddError(field->full_name(), proto,
6592              DescriptorPool::ErrorCollector::EXTENDEE,
6593              "Extensions to non-lite types can only be declared in non-lite "
6594              "files.  Note that you cannot extend a non-lite type to contain "
6595              "a lite type, but the reverse is allowed.");
6596   }
6597 
6598   // Validate map types.
6599   if (field->is_map()) {
6600     if (!ValidateMapEntry(field, proto)) {
6601       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6602                "map_entry should not be set explicitly. Use map<KeyType, "
6603                "ValueType> instead.");
6604     }
6605   }
6606 
6607   ValidateJSType(field, proto);
6608 
6609   // json_name option is not allowed on extension fields. Note that the
6610   // json_name field in FieldDescriptorProto is always populated by protoc
6611   // when it sends descriptor data to plugins (calculated from field name if
6612   // the option is not explicitly set) so we can't rely on its presence to
6613   // determine whether the json_name option is set on the field. Here we
6614   // compare it against the default calculated json_name value and consider
6615   // the option set if they are different. This won't catch the case when
6616   // an user explicitly sets json_name to the default value, but should be
6617   // good enough to catch common misuses.
6618   if (field->is_extension() &&
6619       (field->has_json_name() &&
6620        field->json_name() != ToJsonName(field->name()))) {
6621     AddError(field->full_name(), proto,
6622              DescriptorPool::ErrorCollector::OPTION_NAME,
6623              "option json_name is not allowed on extension fields.");
6624   }
6625 
6626 }
6627 
ValidateEnumOptions(EnumDescriptor * enm,const EnumDescriptorProto & proto)6628 void DescriptorBuilder::ValidateEnumOptions(EnumDescriptor* enm,
6629                                             const EnumDescriptorProto& proto) {
6630   VALIDATE_OPTIONS_FROM_ARRAY(enm, value, EnumValue);
6631   if (!enm->options().has_allow_alias() || !enm->options().allow_alias()) {
6632     std::map<int, std::string> used_values;
6633     for (int i = 0; i < enm->value_count(); ++i) {
6634       const EnumValueDescriptor* enum_value = enm->value(i);
6635       if (used_values.find(enum_value->number()) != used_values.end()) {
6636         std::string error =
6637             "\"" + enum_value->full_name() +
6638             "\" uses the same enum value as \"" +
6639             used_values[enum_value->number()] +
6640             "\". If this is intended, set "
6641             "'option allow_alias = true;' to the enum definition.";
6642         if (!enm->options().allow_alias()) {
6643           // Generate error if duplicated enum values are explicitly disallowed.
6644           AddError(enm->full_name(), proto.value(i),
6645                    DescriptorPool::ErrorCollector::NUMBER, error);
6646         }
6647       } else {
6648         used_values[enum_value->number()] = enum_value->full_name();
6649       }
6650     }
6651   }
6652 }
6653 
ValidateEnumValueOptions(EnumValueDescriptor *,const EnumValueDescriptorProto &)6654 void DescriptorBuilder::ValidateEnumValueOptions(
6655     EnumValueDescriptor* /* enum_value */,
6656     const EnumValueDescriptorProto& /* proto */) {
6657   // Nothing to do so far.
6658 }
6659 
ValidateExtensionRangeOptions(const std::string & full_name,Descriptor::ExtensionRange * extension_range,const DescriptorProto_ExtensionRange & proto)6660 void DescriptorBuilder::ValidateExtensionRangeOptions(
6661     const std::string& full_name, Descriptor::ExtensionRange* extension_range,
6662     const DescriptorProto_ExtensionRange& proto) {
6663 }
6664 
ValidateServiceOptions(ServiceDescriptor * service,const ServiceDescriptorProto & proto)6665 void DescriptorBuilder::ValidateServiceOptions(
6666     ServiceDescriptor* service, const ServiceDescriptorProto& proto) {
6667   if (IsLite(service->file()) &&
6668       (service->file()->options().cc_generic_services() ||
6669        service->file()->options().java_generic_services())) {
6670     AddError(service->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
6671              "Files with optimize_for = LITE_RUNTIME cannot define services "
6672              "unless you set both options cc_generic_services and "
6673              "java_generic_services to false.");
6674   }
6675 
6676   VALIDATE_OPTIONS_FROM_ARRAY(service, method, Method);
6677 }
6678 
ValidateMethodOptions(MethodDescriptor *,const MethodDescriptorProto &)6679 void DescriptorBuilder::ValidateMethodOptions(
6680     MethodDescriptor* /* method */, const MethodDescriptorProto& /* proto */) {
6681   // Nothing to do so far.
6682 }
6683 
ValidateMapEntry(FieldDescriptor * field,const FieldDescriptorProto & proto)6684 bool DescriptorBuilder::ValidateMapEntry(FieldDescriptor* field,
6685                                          const FieldDescriptorProto& proto) {
6686   const Descriptor* message = field->message_type();
6687   if (  // Must not contain extensions, extension range or nested message or
6688         // enums
6689       message->extension_count() != 0 ||
6690       field->label() != FieldDescriptor::LABEL_REPEATED ||
6691       message->extension_range_count() != 0 ||
6692       message->nested_type_count() != 0 || message->enum_type_count() != 0 ||
6693       // Must contain exactly two fields
6694       message->field_count() != 2 ||
6695       // Field name and message name must match
6696       message->name() != ToCamelCase(field->name(), false) + "Entry" ||
6697       // Entry message must be in the same containing type of the field.
6698       field->containing_type() != message->containing_type()) {
6699     return false;
6700   }
6701 
6702   const FieldDescriptor* key = message->map_key();
6703   const FieldDescriptor* value = message->map_value();
6704   if (key->label() != FieldDescriptor::LABEL_OPTIONAL || key->number() != 1 ||
6705       key->name() != "key") {
6706     return false;
6707   }
6708   if (value->label() != FieldDescriptor::LABEL_OPTIONAL ||
6709       value->number() != 2 || value->name() != "value") {
6710     return false;
6711   }
6712 
6713   // Check key types are legal.
6714   switch (key->type()) {
6715     case FieldDescriptor::TYPE_ENUM:
6716       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6717                "Key in map fields cannot be enum types.");
6718       break;
6719     case FieldDescriptor::TYPE_FLOAT:
6720     case FieldDescriptor::TYPE_DOUBLE:
6721     case FieldDescriptor::TYPE_MESSAGE:
6722     case FieldDescriptor::TYPE_GROUP:
6723     case FieldDescriptor::TYPE_BYTES:
6724       AddError(
6725           field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6726           "Key in map fields cannot be float/double, bytes or message types.");
6727       break;
6728     case FieldDescriptor::TYPE_BOOL:
6729     case FieldDescriptor::TYPE_INT32:
6730     case FieldDescriptor::TYPE_INT64:
6731     case FieldDescriptor::TYPE_SINT32:
6732     case FieldDescriptor::TYPE_SINT64:
6733     case FieldDescriptor::TYPE_STRING:
6734     case FieldDescriptor::TYPE_UINT32:
6735     case FieldDescriptor::TYPE_UINT64:
6736     case FieldDescriptor::TYPE_FIXED32:
6737     case FieldDescriptor::TYPE_FIXED64:
6738     case FieldDescriptor::TYPE_SFIXED32:
6739     case FieldDescriptor::TYPE_SFIXED64:
6740       // Legal cases
6741       break;
6742       // Do not add a default, so that the compiler will complain when new types
6743       // are added.
6744   }
6745 
6746   if (value->type() == FieldDescriptor::TYPE_ENUM) {
6747     if (value->enum_type()->value(0)->number() != 0) {
6748       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6749                "Enum value in map must define 0 as the first value.");
6750     }
6751   }
6752 
6753   return true;
6754 }
6755 
DetectMapConflicts(const Descriptor * message,const DescriptorProto & proto)6756 void DescriptorBuilder::DetectMapConflicts(const Descriptor* message,
6757                                            const DescriptorProto& proto) {
6758   std::map<std::string, const Descriptor*> seen_types;
6759   for (int i = 0; i < message->nested_type_count(); ++i) {
6760     const Descriptor* nested = message->nested_type(i);
6761     std::pair<std::map<std::string, const Descriptor*>::iterator, bool> result =
6762         seen_types.insert(std::make_pair(nested->name(), nested));
6763     if (!result.second) {
6764       if (result.first->second->options().map_entry() ||
6765           nested->options().map_entry()) {
6766         AddError(message->full_name(), proto,
6767                  DescriptorPool::ErrorCollector::NAME,
6768                  "Expanded map entry type " + nested->name() +
6769                      " conflicts with an existing nested message type.");
6770       }
6771     }
6772     // Recursively test on the nested types.
6773     DetectMapConflicts(message->nested_type(i), proto.nested_type(i));
6774   }
6775   // Check for conflicted field names.
6776   for (int i = 0; i < message->field_count(); ++i) {
6777     const FieldDescriptor* field = message->field(i);
6778     std::map<std::string, const Descriptor*>::iterator iter =
6779         seen_types.find(field->name());
6780     if (iter != seen_types.end() && iter->second->options().map_entry()) {
6781       AddError(message->full_name(), proto,
6782                DescriptorPool::ErrorCollector::NAME,
6783                "Expanded map entry type " + iter->second->name() +
6784                    " conflicts with an existing field.");
6785     }
6786   }
6787   // Check for conflicted enum names.
6788   for (int i = 0; i < message->enum_type_count(); ++i) {
6789     const EnumDescriptor* enum_desc = message->enum_type(i);
6790     std::map<std::string, const Descriptor*>::iterator iter =
6791         seen_types.find(enum_desc->name());
6792     if (iter != seen_types.end() && iter->second->options().map_entry()) {
6793       AddError(message->full_name(), proto,
6794                DescriptorPool::ErrorCollector::NAME,
6795                "Expanded map entry type " + iter->second->name() +
6796                    " conflicts with an existing enum type.");
6797     }
6798   }
6799   // Check for conflicted oneof names.
6800   for (int i = 0; i < message->oneof_decl_count(); ++i) {
6801     const OneofDescriptor* oneof_desc = message->oneof_decl(i);
6802     std::map<std::string, const Descriptor*>::iterator iter =
6803         seen_types.find(oneof_desc->name());
6804     if (iter != seen_types.end() && iter->second->options().map_entry()) {
6805       AddError(message->full_name(), proto,
6806                DescriptorPool::ErrorCollector::NAME,
6807                "Expanded map entry type " + iter->second->name() +
6808                    " conflicts with an existing oneof type.");
6809     }
6810   }
6811 }
6812 
ValidateJSType(FieldDescriptor * field,const FieldDescriptorProto & proto)6813 void DescriptorBuilder::ValidateJSType(FieldDescriptor* field,
6814                                        const FieldDescriptorProto& proto) {
6815   FieldOptions::JSType jstype = field->options().jstype();
6816   // The default is always acceptable.
6817   if (jstype == FieldOptions::JS_NORMAL) {
6818     return;
6819   }
6820 
6821   switch (field->type()) {
6822     // Integral 64-bit types may be represented as JavaScript numbers or
6823     // strings.
6824     case FieldDescriptor::TYPE_UINT64:
6825     case FieldDescriptor::TYPE_INT64:
6826     case FieldDescriptor::TYPE_SINT64:
6827     case FieldDescriptor::TYPE_FIXED64:
6828     case FieldDescriptor::TYPE_SFIXED64:
6829       if (jstype == FieldOptions::JS_STRING ||
6830           jstype == FieldOptions::JS_NUMBER) {
6831         return;
6832       }
6833       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6834                "Illegal jstype for int64, uint64, sint64, fixed64 "
6835                "or sfixed64 field: " +
6836                    FieldOptions_JSType_descriptor()->value(jstype)->name());
6837       break;
6838 
6839     // No other types permit a jstype option.
6840     default:
6841       AddError(field->full_name(), proto, DescriptorPool::ErrorCollector::TYPE,
6842                "jstype is only allowed on int64, uint64, sint64, fixed64 "
6843                "or sfixed64 fields.");
6844       break;
6845   }
6846 }
6847 
6848 #undef VALIDATE_OPTIONS_FROM_ARRAY
6849 
6850 // -------------------------------------------------------------------
6851 
OptionInterpreter(DescriptorBuilder * builder)6852 DescriptorBuilder::OptionInterpreter::OptionInterpreter(
6853     DescriptorBuilder* builder)
6854     : builder_(builder) {
6855   GOOGLE_CHECK(builder_);
6856 }
6857 
~OptionInterpreter()6858 DescriptorBuilder::OptionInterpreter::~OptionInterpreter() {}
6859 
InterpretOptions(OptionsToInterpret * options_to_interpret)6860 bool DescriptorBuilder::OptionInterpreter::InterpretOptions(
6861     OptionsToInterpret* options_to_interpret) {
6862   // Note that these may be in different pools, so we can't use the same
6863   // descriptor and reflection objects on both.
6864   Message* options = options_to_interpret->options;
6865   const Message* original_options = options_to_interpret->original_options;
6866 
6867   bool failed = false;
6868   options_to_interpret_ = options_to_interpret;
6869 
6870   // Find the uninterpreted_option field in the mutable copy of the options
6871   // and clear them, since we're about to interpret them.
6872   const FieldDescriptor* uninterpreted_options_field =
6873       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
6874   GOOGLE_CHECK(uninterpreted_options_field != nullptr)
6875       << "No field named \"uninterpreted_option\" in the Options proto.";
6876   options->GetReflection()->ClearField(options, uninterpreted_options_field);
6877 
6878   std::vector<int> src_path = options_to_interpret->element_path;
6879   src_path.push_back(uninterpreted_options_field->number());
6880 
6881   // Find the uninterpreted_option field in the original options.
6882   const FieldDescriptor* original_uninterpreted_options_field =
6883       original_options->GetDescriptor()->FindFieldByName(
6884           "uninterpreted_option");
6885   GOOGLE_CHECK(original_uninterpreted_options_field != nullptr)
6886       << "No field named \"uninterpreted_option\" in the Options proto.";
6887 
6888   const int num_uninterpreted_options =
6889       original_options->GetReflection()->FieldSize(
6890           *original_options, original_uninterpreted_options_field);
6891   for (int i = 0; i < num_uninterpreted_options; ++i) {
6892     src_path.push_back(i);
6893     uninterpreted_option_ = down_cast<const UninterpretedOption*>(
6894         &original_options->GetReflection()->GetRepeatedMessage(
6895             *original_options, original_uninterpreted_options_field, i));
6896     if (!InterpretSingleOption(options, src_path,
6897                                options_to_interpret->element_path)) {
6898       // Error already added by InterpretSingleOption().
6899       failed = true;
6900       break;
6901     }
6902     src_path.pop_back();
6903   }
6904   // Reset these, so we don't have any dangling pointers.
6905   uninterpreted_option_ = nullptr;
6906   options_to_interpret_ = nullptr;
6907 
6908   if (!failed) {
6909     // InterpretSingleOption() added the interpreted options in the
6910     // UnknownFieldSet, in case the option isn't yet known to us.  Now we
6911     // serialize the options message and deserialize it back.  That way, any
6912     // option fields that we do happen to know about will get moved from the
6913     // UnknownFieldSet into the real fields, and thus be available right away.
6914     // If they are not known, that's OK too. They will get reparsed into the
6915     // UnknownFieldSet and wait there until the message is parsed by something
6916     // that does know about the options.
6917 
6918     // Keep the unparsed options around in case the reparsing fails.
6919     std::unique_ptr<Message> unparsed_options(options->New());
6920     options->GetReflection()->Swap(unparsed_options.get(), options);
6921 
6922     std::string buf;
6923     if (!unparsed_options->AppendToString(&buf) ||
6924         !options->ParseFromString(buf)) {
6925       builder_->AddError(
6926           options_to_interpret->element_name, *original_options,
6927           DescriptorPool::ErrorCollector::OTHER,
6928           "Some options could not be correctly parsed using the proto "
6929           "descriptors compiled into this binary.\n"
6930           "Unparsed options: " +
6931               unparsed_options->ShortDebugString() +
6932               "\n"
6933               "Parsing attempt:  " +
6934               options->ShortDebugString());
6935       // Restore the unparsed options.
6936       options->GetReflection()->Swap(unparsed_options.get(), options);
6937     }
6938   }
6939 
6940   return !failed;
6941 }
6942 
InterpretSingleOption(Message * options,const std::vector<int> & src_path,const std::vector<int> & options_path)6943 bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption(
6944     Message* options, const std::vector<int>& src_path,
6945     const std::vector<int>& options_path) {
6946   // First do some basic validation.
6947   if (uninterpreted_option_->name_size() == 0) {
6948     // This should never happen unless the parser has gone seriously awry or
6949     // someone has manually created the uninterpreted option badly.
6950     return AddNameError("Option must have a name.");
6951   }
6952   if (uninterpreted_option_->name(0).name_part() == "uninterpreted_option") {
6953     return AddNameError(
6954         "Option must not use reserved name "
6955         "\"uninterpreted_option\".");
6956   }
6957 
6958   const Descriptor* options_descriptor = nullptr;
6959   // Get the options message's descriptor from the builder's pool, so that we
6960   // get the version that knows about any extension options declared in the file
6961   // we're currently building. The descriptor should be there as long as the
6962   // file we're building imported descriptor.proto.
6963 
6964   // Note that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
6965   // DescriptorPool::FindMessageTypeByName() because we're already holding the
6966   // pool's mutex, and the latter method locks it again.  We don't use
6967   // FindSymbol() because files that use custom options only need to depend on
6968   // the file that defines the option, not descriptor.proto itself.
6969   Symbol symbol = builder_->FindSymbolNotEnforcingDeps(
6970       options->GetDescriptor()->full_name());
6971   options_descriptor = symbol.descriptor();
6972   if (options_descriptor == nullptr) {
6973     // The options message's descriptor was not in the builder's pool, so use
6974     // the standard version from the generated pool. We're not holding the
6975     // generated pool's mutex, so we can search it the straightforward way.
6976     options_descriptor = options->GetDescriptor();
6977   }
6978   GOOGLE_CHECK(options_descriptor);
6979 
6980   // We iterate over the name parts to drill into the submessages until we find
6981   // the leaf field for the option. As we drill down we remember the current
6982   // submessage's descriptor in |descriptor| and the next field in that
6983   // submessage in |field|. We also track the fields we're drilling down
6984   // through in |intermediate_fields|. As we go, we reconstruct the full option
6985   // name in |debug_msg_name|, for use in error messages.
6986   const Descriptor* descriptor = options_descriptor;
6987   const FieldDescriptor* field = nullptr;
6988   std::vector<const FieldDescriptor*> intermediate_fields;
6989   std::string debug_msg_name = "";
6990 
6991   std::vector<int> dest_path = options_path;
6992 
6993   for (int i = 0; i < uninterpreted_option_->name_size(); ++i) {
6994     builder_->undefine_resolved_name_.clear();
6995     const std::string& name_part = uninterpreted_option_->name(i).name_part();
6996     if (debug_msg_name.size() > 0) {
6997       debug_msg_name += ".";
6998     }
6999     if (uninterpreted_option_->name(i).is_extension()) {
7000       debug_msg_name += "(" + name_part + ")";
7001       // Search for the extension's descriptor as an extension in the builder's
7002       // pool. Note that we use DescriptorBuilder::LookupSymbol(), not
7003       // DescriptorPool::FindExtensionByName(), for two reasons: 1) It allows
7004       // relative lookups, and 2) because we're already holding the pool's
7005       // mutex, and the latter method locks it again.
7006       symbol =
7007           builder_->LookupSymbol(name_part, options_to_interpret_->name_scope);
7008       field = symbol.field_descriptor();
7009       // If we don't find the field then the field's descriptor was not in the
7010       // builder's pool, but there's no point in looking in the generated
7011       // pool. We require that you import the file that defines any extensions
7012       // you use, so they must be present in the builder's pool.
7013     } else {
7014       debug_msg_name += name_part;
7015       // Search for the field's descriptor as a regular field.
7016       field = descriptor->FindFieldByName(name_part);
7017     }
7018 
7019     if (field == nullptr) {
7020       if (get_allow_unknown(builder_->pool_)) {
7021         // We can't find the option, but AllowUnknownDependencies() is enabled,
7022         // so we will just leave it as uninterpreted.
7023         AddWithoutInterpreting(*uninterpreted_option_, options);
7024         return true;
7025       } else if (!(builder_->undefine_resolved_name_).empty()) {
7026         // Option is resolved to a name which is not defined.
7027         return AddNameError(
7028             "Option \"" + debug_msg_name + "\" is resolved to \"(" +
7029             builder_->undefine_resolved_name_ +
7030             ")\", which is not defined. The innermost scope is searched first "
7031             "in name resolution. Consider using a leading '.'(i.e., \"(." +
7032             debug_msg_name.substr(1) +
7033             "\") to start from the outermost scope.");
7034       } else {
7035         return AddNameError(
7036             "Option \"" + debug_msg_name +
7037             "\" unknown. Ensure that your proto" +
7038             " definition file imports the proto which defines the option.");
7039       }
7040     } else if (field->containing_type() != descriptor) {
7041       if (get_is_placeholder(field->containing_type())) {
7042         // The field is an extension of a placeholder type, so we can't
7043         // reliably verify whether it is a valid extension to use here (e.g.
7044         // we don't know if it is an extension of the correct *Options message,
7045         // or if it has a valid field number, etc.).  Just leave it as
7046         // uninterpreted instead.
7047         AddWithoutInterpreting(*uninterpreted_option_, options);
7048         return true;
7049       } else {
7050         // This can only happen if, due to some insane misconfiguration of the
7051         // pools, we find the options message in one pool but the field in
7052         // another. This would probably imply a hefty bug somewhere.
7053         return AddNameError("Option field \"" + debug_msg_name +
7054                             "\" is not a field or extension of message \"" +
7055                             descriptor->name() + "\".");
7056       }
7057     } else {
7058       // accumulate field numbers to form path to interpreted option
7059       dest_path.push_back(field->number());
7060 
7061       if (i < uninterpreted_option_->name_size() - 1) {
7062         if (field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
7063           return AddNameError("Option \"" + debug_msg_name +
7064                               "\" is an atomic type, not a message.");
7065         } else if (field->is_repeated()) {
7066           return AddNameError("Option field \"" + debug_msg_name +
7067                               "\" is a repeated message. Repeated message "
7068                               "options must be initialized using an "
7069                               "aggregate value.");
7070         } else {
7071           // Drill down into the submessage.
7072           intermediate_fields.push_back(field);
7073           descriptor = field->message_type();
7074         }
7075       }
7076     }
7077   }
7078 
7079   // We've found the leaf field. Now we use UnknownFieldSets to set its value
7080   // on the options message. We do so because the message may not yet know
7081   // about its extension fields, so we may not be able to set the fields
7082   // directly. But the UnknownFieldSets will serialize to the same wire-format
7083   // message, so reading that message back in once the extension fields are
7084   // known will populate them correctly.
7085 
7086   // First see if the option is already set.
7087   if (!field->is_repeated() &&
7088       !ExamineIfOptionIsSet(
7089           intermediate_fields.begin(), intermediate_fields.end(), field,
7090           debug_msg_name,
7091           options->GetReflection()->GetUnknownFields(*options))) {
7092     return false;  // ExamineIfOptionIsSet() already added the error.
7093   }
7094 
7095   // First set the value on the UnknownFieldSet corresponding to the
7096   // innermost message.
7097   std::unique_ptr<UnknownFieldSet> unknown_fields(new UnknownFieldSet());
7098   if (!SetOptionValue(field, unknown_fields.get())) {
7099     return false;  // SetOptionValue() already added the error.
7100   }
7101 
7102   // Now wrap the UnknownFieldSet with UnknownFieldSets corresponding to all
7103   // the intermediate messages.
7104   for (std::vector<const FieldDescriptor*>::reverse_iterator iter =
7105            intermediate_fields.rbegin();
7106        iter != intermediate_fields.rend(); ++iter) {
7107     std::unique_ptr<UnknownFieldSet> parent_unknown_fields(
7108         new UnknownFieldSet());
7109     switch ((*iter)->type()) {
7110       case FieldDescriptor::TYPE_MESSAGE: {
7111         io::StringOutputStream outstr(
7112             parent_unknown_fields->AddLengthDelimited((*iter)->number()));
7113         io::CodedOutputStream out(&outstr);
7114         internal::WireFormat::SerializeUnknownFields(*unknown_fields, &out);
7115         GOOGLE_CHECK(!out.HadError())
7116             << "Unexpected failure while serializing option submessage "
7117             << debug_msg_name << "\".";
7118         break;
7119       }
7120 
7121       case FieldDescriptor::TYPE_GROUP: {
7122         parent_unknown_fields->AddGroup((*iter)->number())
7123             ->MergeFrom(*unknown_fields);
7124         break;
7125       }
7126 
7127       default:
7128         GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_MESSAGE: "
7129                    << (*iter)->type();
7130         return false;
7131     }
7132     unknown_fields.reset(parent_unknown_fields.release());
7133   }
7134 
7135   // Now merge the UnknownFieldSet corresponding to the top-level message into
7136   // the options message.
7137   options->GetReflection()->MutableUnknownFields(options)->MergeFrom(
7138       *unknown_fields);
7139 
7140   // record the element path of the interpreted option
7141   if (field->is_repeated()) {
7142     int index = repeated_option_counts_[dest_path]++;
7143     dest_path.push_back(index);
7144   }
7145   interpreted_paths_[src_path] = dest_path;
7146 
7147   return true;
7148 }
7149 
UpdateSourceCodeInfo(SourceCodeInfo * info)7150 void DescriptorBuilder::OptionInterpreter::UpdateSourceCodeInfo(
7151     SourceCodeInfo* info) {
7152   if (interpreted_paths_.empty()) {
7153     // nothing to do!
7154     return;
7155   }
7156 
7157   // We find locations that match keys in interpreted_paths_ and
7158   // 1) replace the path with the corresponding value in interpreted_paths_
7159   // 2) remove any subsequent sub-locations (sub-location is one whose path
7160   //    has the parent path as a prefix)
7161   //
7162   // To avoid quadratic behavior of removing interior rows as we go,
7163   // we keep a copy. But we don't actually copy anything until we've
7164   // found the first match (so if the source code info has no locations
7165   // that need to be changed, there is zero copy overhead).
7166 
7167   RepeatedPtrField<SourceCodeInfo_Location>* locs = info->mutable_location();
7168   RepeatedPtrField<SourceCodeInfo_Location> new_locs;
7169   bool copying = false;
7170 
7171   std::vector<int> pathv;
7172   bool matched = false;
7173 
7174   for (RepeatedPtrField<SourceCodeInfo_Location>::iterator loc = locs->begin();
7175        loc != locs->end(); loc++) {
7176     if (matched) {
7177       // see if this location is in the range to remove
7178       bool loc_matches = true;
7179       if (loc->path_size() < static_cast<int64_t>(pathv.size())) {
7180         loc_matches = false;
7181       } else {
7182         for (size_t j = 0; j < pathv.size(); j++) {
7183           if (loc->path(j) != pathv[j]) {
7184             loc_matches = false;
7185             break;
7186           }
7187         }
7188       }
7189 
7190       if (loc_matches) {
7191         // don't copy this row since it is a sub-location that we're removing
7192         continue;
7193       }
7194 
7195       matched = false;
7196     }
7197 
7198     pathv.clear();
7199     for (int j = 0; j < loc->path_size(); j++) {
7200       pathv.push_back(loc->path(j));
7201     }
7202 
7203     std::map<std::vector<int>, std::vector<int>>::iterator entry =
7204         interpreted_paths_.find(pathv);
7205 
7206     if (entry == interpreted_paths_.end()) {
7207       // not a match
7208       if (copying) {
7209         *new_locs.Add() = *loc;
7210       }
7211       continue;
7212     }
7213 
7214     matched = true;
7215 
7216     if (!copying) {
7217       // initialize the copy we are building
7218       copying = true;
7219       new_locs.Reserve(locs->size());
7220       for (RepeatedPtrField<SourceCodeInfo_Location>::iterator it =
7221                locs->begin();
7222            it != loc; it++) {
7223         *new_locs.Add() = *it;
7224       }
7225     }
7226 
7227     // add replacement and update its path
7228     SourceCodeInfo_Location* replacement = new_locs.Add();
7229     *replacement = *loc;
7230     replacement->clear_path();
7231     for (std::vector<int>::iterator rit = entry->second.begin();
7232          rit != entry->second.end(); rit++) {
7233       replacement->add_path(*rit);
7234     }
7235   }
7236 
7237   // if we made a changed copy, put it in place
7238   if (copying) {
7239     *locs = new_locs;
7240   }
7241 }
7242 
AddWithoutInterpreting(const UninterpretedOption & uninterpreted_option,Message * options)7243 void DescriptorBuilder::OptionInterpreter::AddWithoutInterpreting(
7244     const UninterpretedOption& uninterpreted_option, Message* options) {
7245   const FieldDescriptor* field =
7246       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
7247   GOOGLE_CHECK(field != nullptr);
7248 
7249   options->GetReflection()
7250       ->AddMessage(options, field)
7251       ->CopyFrom(uninterpreted_option);
7252 }
7253 
ExamineIfOptionIsSet(std::vector<const FieldDescriptor * >::const_iterator intermediate_fields_iter,std::vector<const FieldDescriptor * >::const_iterator intermediate_fields_end,const FieldDescriptor * innermost_field,const std::string & debug_msg_name,const UnknownFieldSet & unknown_fields)7254 bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
7255     std::vector<const FieldDescriptor*>::const_iterator
7256         intermediate_fields_iter,
7257     std::vector<const FieldDescriptor*>::const_iterator intermediate_fields_end,
7258     const FieldDescriptor* innermost_field, const std::string& debug_msg_name,
7259     const UnknownFieldSet& unknown_fields) {
7260   // We do linear searches of the UnknownFieldSet and its sub-groups.  This
7261   // should be fine since it's unlikely that any one options structure will
7262   // contain more than a handful of options.
7263 
7264   if (intermediate_fields_iter == intermediate_fields_end) {
7265     // We're at the innermost submessage.
7266     for (int i = 0; i < unknown_fields.field_count(); i++) {
7267       if (unknown_fields.field(i).number() == innermost_field->number()) {
7268         return AddNameError("Option \"" + debug_msg_name +
7269                             "\" was already set.");
7270       }
7271     }
7272     return true;
7273   }
7274 
7275   for (int i = 0; i < unknown_fields.field_count(); i++) {
7276     if (unknown_fields.field(i).number() ==
7277         (*intermediate_fields_iter)->number()) {
7278       const UnknownField* unknown_field = &unknown_fields.field(i);
7279       FieldDescriptor::Type type = (*intermediate_fields_iter)->type();
7280       // Recurse into the next submessage.
7281       switch (type) {
7282         case FieldDescriptor::TYPE_MESSAGE:
7283           if (unknown_field->type() == UnknownField::TYPE_LENGTH_DELIMITED) {
7284             UnknownFieldSet intermediate_unknown_fields;
7285             if (intermediate_unknown_fields.ParseFromString(
7286                     unknown_field->length_delimited()) &&
7287                 !ExamineIfOptionIsSet(intermediate_fields_iter + 1,
7288                                       intermediate_fields_end, innermost_field,
7289                                       debug_msg_name,
7290                                       intermediate_unknown_fields)) {
7291               return false;  // Error already added.
7292             }
7293           }
7294           break;
7295 
7296         case FieldDescriptor::TYPE_GROUP:
7297           if (unknown_field->type() == UnknownField::TYPE_GROUP) {
7298             if (!ExamineIfOptionIsSet(intermediate_fields_iter + 1,
7299                                       intermediate_fields_end, innermost_field,
7300                                       debug_msg_name, unknown_field->group())) {
7301               return false;  // Error already added.
7302             }
7303           }
7304           break;
7305 
7306         default:
7307           GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_MESSAGE: " << type;
7308           return false;
7309       }
7310     }
7311   }
7312   return true;
7313 }
7314 
SetOptionValue(const FieldDescriptor * option_field,UnknownFieldSet * unknown_fields)7315 bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
7316     const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
7317   // We switch on the CppType to validate.
7318   switch (option_field->cpp_type()) {
7319     case FieldDescriptor::CPPTYPE_INT32:
7320       if (uninterpreted_option_->has_positive_int_value()) {
7321         if (uninterpreted_option_->positive_int_value() >
7322             static_cast<uint64_t>(kint32max)) {
7323           return AddValueError("Value out of range for int32 option \"" +
7324                                option_field->full_name() + "\".");
7325         } else {
7326           SetInt32(option_field->number(),
7327                    uninterpreted_option_->positive_int_value(),
7328                    option_field->type(), unknown_fields);
7329         }
7330       } else if (uninterpreted_option_->has_negative_int_value()) {
7331         if (uninterpreted_option_->negative_int_value() <
7332             static_cast<int64_t>(kint32min)) {
7333           return AddValueError("Value out of range for int32 option \"" +
7334                                option_field->full_name() + "\".");
7335         } else {
7336           SetInt32(option_field->number(),
7337                    uninterpreted_option_->negative_int_value(),
7338                    option_field->type(), unknown_fields);
7339         }
7340       } else {
7341         return AddValueError("Value must be integer for int32 option \"" +
7342                              option_field->full_name() + "\".");
7343       }
7344       break;
7345 
7346     case FieldDescriptor::CPPTYPE_INT64:
7347       if (uninterpreted_option_->has_positive_int_value()) {
7348         if (uninterpreted_option_->positive_int_value() >
7349             static_cast<uint64_t>(kint64max)) {
7350           return AddValueError("Value out of range for int64 option \"" +
7351                                option_field->full_name() + "\".");
7352         } else {
7353           SetInt64(option_field->number(),
7354                    uninterpreted_option_->positive_int_value(),
7355                    option_field->type(), unknown_fields);
7356         }
7357       } else if (uninterpreted_option_->has_negative_int_value()) {
7358         SetInt64(option_field->number(),
7359                  uninterpreted_option_->negative_int_value(),
7360                  option_field->type(), unknown_fields);
7361       } else {
7362         return AddValueError("Value must be integer for int64 option \"" +
7363                              option_field->full_name() + "\".");
7364       }
7365       break;
7366 
7367     case FieldDescriptor::CPPTYPE_UINT32:
7368       if (uninterpreted_option_->has_positive_int_value()) {
7369         if (uninterpreted_option_->positive_int_value() > kuint32max) {
7370           return AddValueError("Value out of range for uint32 option \"" +
7371                                option_field->name() + "\".");
7372         } else {
7373           SetUInt32(option_field->number(),
7374                     uninterpreted_option_->positive_int_value(),
7375                     option_field->type(), unknown_fields);
7376         }
7377       } else {
7378         return AddValueError(
7379             "Value must be non-negative integer for uint32 "
7380             "option \"" +
7381             option_field->full_name() + "\".");
7382       }
7383       break;
7384 
7385     case FieldDescriptor::CPPTYPE_UINT64:
7386       if (uninterpreted_option_->has_positive_int_value()) {
7387         SetUInt64(option_field->number(),
7388                   uninterpreted_option_->positive_int_value(),
7389                   option_field->type(), unknown_fields);
7390       } else {
7391         return AddValueError(
7392             "Value must be non-negative integer for uint64 "
7393             "option \"" +
7394             option_field->full_name() + "\".");
7395       }
7396       break;
7397 
7398     case FieldDescriptor::CPPTYPE_FLOAT: {
7399       float value;
7400       if (uninterpreted_option_->has_double_value()) {
7401         value = uninterpreted_option_->double_value();
7402       } else if (uninterpreted_option_->has_positive_int_value()) {
7403         value = uninterpreted_option_->positive_int_value();
7404       } else if (uninterpreted_option_->has_negative_int_value()) {
7405         value = uninterpreted_option_->negative_int_value();
7406       } else {
7407         return AddValueError("Value must be number for float option \"" +
7408                              option_field->full_name() + "\".");
7409       }
7410       unknown_fields->AddFixed32(option_field->number(),
7411                                  internal::WireFormatLite::EncodeFloat(value));
7412       break;
7413     }
7414 
7415     case FieldDescriptor::CPPTYPE_DOUBLE: {
7416       double value;
7417       if (uninterpreted_option_->has_double_value()) {
7418         value = uninterpreted_option_->double_value();
7419       } else if (uninterpreted_option_->has_positive_int_value()) {
7420         value = uninterpreted_option_->positive_int_value();
7421       } else if (uninterpreted_option_->has_negative_int_value()) {
7422         value = uninterpreted_option_->negative_int_value();
7423       } else {
7424         return AddValueError("Value must be number for double option \"" +
7425                              option_field->full_name() + "\".");
7426       }
7427       unknown_fields->AddFixed64(option_field->number(),
7428                                  internal::WireFormatLite::EncodeDouble(value));
7429       break;
7430     }
7431 
7432     case FieldDescriptor::CPPTYPE_BOOL:
7433       uint64_t value;
7434       if (!uninterpreted_option_->has_identifier_value()) {
7435         return AddValueError(
7436             "Value must be identifier for boolean option "
7437             "\"" +
7438             option_field->full_name() + "\".");
7439       }
7440       if (uninterpreted_option_->identifier_value() == "true") {
7441         value = 1;
7442       } else if (uninterpreted_option_->identifier_value() == "false") {
7443         value = 0;
7444       } else {
7445         return AddValueError(
7446             "Value must be \"true\" or \"false\" for boolean "
7447             "option \"" +
7448             option_field->full_name() + "\".");
7449       }
7450       unknown_fields->AddVarint(option_field->number(), value);
7451       break;
7452 
7453     case FieldDescriptor::CPPTYPE_ENUM: {
7454       if (!uninterpreted_option_->has_identifier_value()) {
7455         return AddValueError(
7456             "Value must be identifier for enum-valued option "
7457             "\"" +
7458             option_field->full_name() + "\".");
7459       }
7460       const EnumDescriptor* enum_type = option_field->enum_type();
7461       const std::string& value_name = uninterpreted_option_->identifier_value();
7462       const EnumValueDescriptor* enum_value = nullptr;
7463 
7464       if (enum_type->file()->pool() != DescriptorPool::generated_pool()) {
7465         // Note that the enum value's fully-qualified name is a sibling of the
7466         // enum's name, not a child of it.
7467         std::string fully_qualified_name = enum_type->full_name();
7468         fully_qualified_name.resize(fully_qualified_name.size() -
7469                                     enum_type->name().size());
7470         fully_qualified_name += value_name;
7471 
7472         // Search for the enum value's descriptor in the builder's pool. Note
7473         // that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
7474         // DescriptorPool::FindEnumValueByName() because we're already holding
7475         // the pool's mutex, and the latter method locks it again.
7476         Symbol symbol =
7477             builder_->FindSymbolNotEnforcingDeps(fully_qualified_name);
7478         if (auto* candicate_descriptor = symbol.enum_value_descriptor()) {
7479           if (candicate_descriptor->type() != enum_type) {
7480             return AddValueError(
7481                 "Enum type \"" + enum_type->full_name() +
7482                 "\" has no value named \"" + value_name + "\" for option \"" +
7483                 option_field->full_name() +
7484                 "\". This appears to be a value from a sibling type.");
7485           } else {
7486             enum_value = candicate_descriptor;
7487           }
7488         }
7489       } else {
7490         // The enum type is in the generated pool, so we can search for the
7491         // value there.
7492         enum_value = enum_type->FindValueByName(value_name);
7493       }
7494 
7495       if (enum_value == nullptr) {
7496         return AddValueError("Enum type \"" +
7497                              option_field->enum_type()->full_name() +
7498                              "\" has no value named \"" + value_name +
7499                              "\" for "
7500                              "option \"" +
7501                              option_field->full_name() + "\".");
7502       } else {
7503         // Sign-extension is not a problem, since we cast directly from int32_t
7504         // to uint64_t, without first going through uint32_t.
7505         unknown_fields->AddVarint(
7506             option_field->number(),
7507             static_cast<uint64_t>(static_cast<int64_t>(enum_value->number())));
7508       }
7509       break;
7510     }
7511 
7512     case FieldDescriptor::CPPTYPE_STRING:
7513       if (!uninterpreted_option_->has_string_value()) {
7514         return AddValueError(
7515             "Value must be quoted string for string option "
7516             "\"" +
7517             option_field->full_name() + "\".");
7518       }
7519       // The string has already been unquoted and unescaped by the parser.
7520       unknown_fields->AddLengthDelimited(option_field->number(),
7521                                          uninterpreted_option_->string_value());
7522       break;
7523 
7524     case FieldDescriptor::CPPTYPE_MESSAGE:
7525       if (!SetAggregateOption(option_field, unknown_fields)) {
7526         return false;
7527       }
7528       break;
7529   }
7530 
7531   return true;
7532 }
7533 
7534 class DescriptorBuilder::OptionInterpreter::AggregateOptionFinder
7535     : public TextFormat::Finder {
7536  public:
7537   DescriptorBuilder* builder_;
7538 
FindAnyType(const Message & message,const std::string & prefix,const std::string & name) const7539   const Descriptor* FindAnyType(const Message& message,
7540                                 const std::string& prefix,
7541                                 const std::string& name) const override {
7542     if (prefix != internal::kTypeGoogleApisComPrefix &&
7543         prefix != internal::kTypeGoogleProdComPrefix) {
7544       return nullptr;
7545     }
7546     assert_mutex_held(builder_->pool_);
7547     return builder_->FindSymbol(name).descriptor();
7548   }
7549 
FindExtension(Message * message,const std::string & name) const7550   const FieldDescriptor* FindExtension(Message* message,
7551                                        const std::string& name) const override {
7552     assert_mutex_held(builder_->pool_);
7553     const Descriptor* descriptor = message->GetDescriptor();
7554     Symbol result =
7555         builder_->LookupSymbolNoPlaceholder(name, descriptor->full_name());
7556     if (auto* field = result.field_descriptor()) {
7557       return field;
7558     } else if (result.type() == Symbol::MESSAGE &&
7559                descriptor->options().message_set_wire_format()) {
7560       const Descriptor* foreign_type = result.descriptor();
7561       // The text format allows MessageSet items to be specified using
7562       // the type name, rather than the extension identifier. If the symbol
7563       // lookup returned a Message, and the enclosing Message has
7564       // message_set_wire_format = true, then return the message set
7565       // extension, if one exists.
7566       for (int i = 0; i < foreign_type->extension_count(); i++) {
7567         const FieldDescriptor* extension = foreign_type->extension(i);
7568         if (extension->containing_type() == descriptor &&
7569             extension->type() == FieldDescriptor::TYPE_MESSAGE &&
7570             extension->is_optional() &&
7571             extension->message_type() == foreign_type) {
7572           // Found it.
7573           return extension;
7574         }
7575       }
7576     }
7577     return nullptr;
7578   }
7579 };
7580 
7581 // A custom error collector to record any text-format parsing errors
7582 namespace {
7583 class AggregateErrorCollector : public io::ErrorCollector {
7584  public:
7585   std::string error_;
7586 
AddError(int,int,const std::string & message)7587   void AddError(int /* line */, int /* column */,
7588                 const std::string& message) override {
7589     if (!error_.empty()) {
7590       error_ += "; ";
7591     }
7592     error_ += message;
7593   }
7594 
AddWarning(int,int,const std::string &)7595   void AddWarning(int /* line */, int /* column */,
7596                   const std::string& /* message */) override {
7597     // Ignore warnings
7598   }
7599 };
7600 }  // namespace
7601 
7602 // We construct a dynamic message of the type corresponding to
7603 // option_field, parse the supplied text-format string into this
7604 // message, and serialize the resulting message to produce the value.
SetAggregateOption(const FieldDescriptor * option_field,UnknownFieldSet * unknown_fields)7605 bool DescriptorBuilder::OptionInterpreter::SetAggregateOption(
7606     const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
7607   if (!uninterpreted_option_->has_aggregate_value()) {
7608     return AddValueError("Option \"" + option_field->full_name() +
7609                          "\" is a message. To set the entire message, use "
7610                          "syntax like \"" +
7611                          option_field->name() +
7612                          " = { <proto text format> }\". "
7613                          "To set fields within it, use "
7614                          "syntax like \"" +
7615                          option_field->name() + ".foo = value\".");
7616   }
7617 
7618   const Descriptor* type = option_field->message_type();
7619   std::unique_ptr<Message> dynamic(dynamic_factory_.GetPrototype(type)->New());
7620   GOOGLE_CHECK(dynamic.get() != nullptr)
7621       << "Could not create an instance of " << option_field->DebugString();
7622 
7623   AggregateErrorCollector collector;
7624   AggregateOptionFinder finder;
7625   finder.builder_ = builder_;
7626   TextFormat::Parser parser;
7627   parser.RecordErrorsTo(&collector);
7628   parser.SetFinder(&finder);
7629   if (!parser.ParseFromString(uninterpreted_option_->aggregate_value(),
7630                               dynamic.get())) {
7631     AddValueError("Error while parsing option value for \"" +
7632                   option_field->name() + "\": " + collector.error_);
7633     return false;
7634   } else {
7635     std::string serial;
7636     dynamic->SerializeToString(&serial);  // Never fails
7637     if (option_field->type() == FieldDescriptor::TYPE_MESSAGE) {
7638       unknown_fields->AddLengthDelimited(option_field->number(), serial);
7639     } else {
7640       GOOGLE_CHECK_EQ(option_field->type(), FieldDescriptor::TYPE_GROUP);
7641       UnknownFieldSet* group = unknown_fields->AddGroup(option_field->number());
7642       group->ParseFromString(serial);
7643     }
7644     return true;
7645   }
7646 }
7647 
SetInt32(int number,int32_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)7648 void DescriptorBuilder::OptionInterpreter::SetInt32(
7649     int number, int32_t value, FieldDescriptor::Type type,
7650     UnknownFieldSet* unknown_fields) {
7651   switch (type) {
7652     case FieldDescriptor::TYPE_INT32:
7653       unknown_fields->AddVarint(
7654           number, static_cast<uint64_t>(static_cast<int64_t>(value)));
7655       break;
7656 
7657     case FieldDescriptor::TYPE_SFIXED32:
7658       unknown_fields->AddFixed32(number, static_cast<uint32_t>(value));
7659       break;
7660 
7661     case FieldDescriptor::TYPE_SINT32:
7662       unknown_fields->AddVarint(
7663           number, internal::WireFormatLite::ZigZagEncode32(value));
7664       break;
7665 
7666     default:
7667       GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_INT32: " << type;
7668       break;
7669   }
7670 }
7671 
SetInt64(int number,int64_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)7672 void DescriptorBuilder::OptionInterpreter::SetInt64(
7673     int number, int64_t value, FieldDescriptor::Type type,
7674     UnknownFieldSet* unknown_fields) {
7675   switch (type) {
7676     case FieldDescriptor::TYPE_INT64:
7677       unknown_fields->AddVarint(number, static_cast<uint64_t>(value));
7678       break;
7679 
7680     case FieldDescriptor::TYPE_SFIXED64:
7681       unknown_fields->AddFixed64(number, static_cast<uint64_t>(value));
7682       break;
7683 
7684     case FieldDescriptor::TYPE_SINT64:
7685       unknown_fields->AddVarint(
7686           number, internal::WireFormatLite::ZigZagEncode64(value));
7687       break;
7688 
7689     default:
7690       GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_INT64: " << type;
7691       break;
7692   }
7693 }
7694 
SetUInt32(int number,uint32_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)7695 void DescriptorBuilder::OptionInterpreter::SetUInt32(
7696     int number, uint32_t value, FieldDescriptor::Type type,
7697     UnknownFieldSet* unknown_fields) {
7698   switch (type) {
7699     case FieldDescriptor::TYPE_UINT32:
7700       unknown_fields->AddVarint(number, static_cast<uint64_t>(value));
7701       break;
7702 
7703     case FieldDescriptor::TYPE_FIXED32:
7704       unknown_fields->AddFixed32(number, static_cast<uint32_t>(value));
7705       break;
7706 
7707     default:
7708       GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_UINT32: " << type;
7709       break;
7710   }
7711 }
7712 
SetUInt64(int number,uint64_t value,FieldDescriptor::Type type,UnknownFieldSet * unknown_fields)7713 void DescriptorBuilder::OptionInterpreter::SetUInt64(
7714     int number, uint64_t value, FieldDescriptor::Type type,
7715     UnknownFieldSet* unknown_fields) {
7716   switch (type) {
7717     case FieldDescriptor::TYPE_UINT64:
7718       unknown_fields->AddVarint(number, value);
7719       break;
7720 
7721     case FieldDescriptor::TYPE_FIXED64:
7722       unknown_fields->AddFixed64(number, value);
7723       break;
7724 
7725     default:
7726       GOOGLE_LOG(FATAL) << "Invalid wire type for CPPTYPE_UINT64: " << type;
7727       break;
7728   }
7729 }
7730 
LogUnusedDependency(const FileDescriptorProto & proto,const FileDescriptor * result)7731 void DescriptorBuilder::LogUnusedDependency(const FileDescriptorProto& proto,
7732                                             const FileDescriptor* result) {
7733 
7734   if (!unused_dependency_.empty()) {
7735     auto itr = pool_->unused_import_track_files_.find(proto.name());
7736     bool is_error =
7737         itr != pool_->unused_import_track_files_.end() && itr->second;
7738     for (std::set<const FileDescriptor*>::const_iterator it =
7739              unused_dependency_.begin();
7740          it != unused_dependency_.end(); ++it) {
7741       std::string error_message = "Import " + (*it)->name() + " is unused.";
7742       if (is_error) {
7743         AddError((*it)->name(), proto, DescriptorPool::ErrorCollector::IMPORT,
7744                  error_message);
7745       } else {
7746         AddWarning((*it)->name(), proto, DescriptorPool::ErrorCollector::IMPORT,
7747                    error_message);
7748       }
7749     }
7750   }
7751 }
7752 
CrossLinkOnDemandHelper(StringPiece name,bool expecting_enum) const7753 Symbol DescriptorPool::CrossLinkOnDemandHelper(StringPiece name,
7754                                                bool expecting_enum) const {
7755   auto lookup_name = std::string(name);
7756   if (!lookup_name.empty() && lookup_name[0] == '.') {
7757     lookup_name = lookup_name.substr(1);
7758   }
7759   Symbol result = tables_->FindByNameHelper(this, lookup_name);
7760   return result;
7761 }
7762 
7763 // Handle the lazy import building for a message field whose type wasn't built
7764 // at cross link time. If that was the case, we saved the name of the type to
7765 // be looked up when the accessor for the type was called. Set type_,
7766 // enum_type_, message_type_, and default_value_enum_ appropriately.
InternalTypeOnceInit() const7767 void FieldDescriptor::InternalTypeOnceInit() const {
7768   GOOGLE_CHECK(file()->finished_building_ == true);
7769   const EnumDescriptor* enum_type = nullptr;
7770   if (type_once_->field.type_name) {
7771     Symbol result = file()->pool()->CrossLinkOnDemandHelper(
7772         *type_once_->field.type_name, type_ == FieldDescriptor::TYPE_ENUM);
7773     if (result.type() == Symbol::MESSAGE) {
7774       type_ = FieldDescriptor::TYPE_MESSAGE;
7775       type_descriptor_.message_type = result.descriptor();
7776     } else if (result.type() == Symbol::ENUM) {
7777       type_ = FieldDescriptor::TYPE_ENUM;
7778       enum_type = type_descriptor_.enum_type = result.enum_descriptor();
7779     }
7780   }
7781   if (enum_type && !default_value_enum_) {
7782     if (type_once_->field.default_value_enum_name) {
7783       // Have to build the full name now instead of at CrossLink time,
7784       // because enum_type may not be known at the time.
7785       std::string name = enum_type->full_name();
7786       // Enum values reside in the same scope as the enum type.
7787       std::string::size_type last_dot = name.find_last_of('.');
7788       if (last_dot != std::string::npos) {
7789         name = name.substr(0, last_dot) + "." +
7790                *type_once_->field.default_value_enum_name;
7791       } else {
7792         name = *type_once_->field.default_value_enum_name;
7793       }
7794       Symbol result = file()->pool()->CrossLinkOnDemandHelper(name, true);
7795       default_value_enum_ = result.enum_value_descriptor();
7796     }
7797     if (!default_value_enum_) {
7798       // We use the first defined value as the default
7799       // if a default is not explicitly defined.
7800       GOOGLE_CHECK(enum_type->value_count());
7801       default_value_enum_ = enum_type->value(0);
7802     }
7803   }
7804 }
7805 
TypeOnceInit(const FieldDescriptor * to_init)7806 void FieldDescriptor::TypeOnceInit(const FieldDescriptor* to_init) {
7807   to_init->InternalTypeOnceInit();
7808 }
7809 
7810 // message_type(), enum_type(), default_value_enum(), and type()
7811 // all share the same internal::call_once init path to do lazy
7812 // import building and cross linking of a field of a message.
message_type() const7813 const Descriptor* FieldDescriptor::message_type() const {
7814   if (type_once_) {
7815     internal::call_once(type_once_->once, FieldDescriptor::TypeOnceInit, this);
7816   }
7817   return type_ == TYPE_MESSAGE || type_ == TYPE_GROUP
7818              ? type_descriptor_.message_type
7819              : nullptr;
7820 }
7821 
enum_type() const7822 const EnumDescriptor* FieldDescriptor::enum_type() const {
7823   if (type_once_) {
7824     internal::call_once(type_once_->once, FieldDescriptor::TypeOnceInit, this);
7825   }
7826   return type_ == TYPE_ENUM ? type_descriptor_.enum_type : nullptr;
7827 }
7828 
default_value_enum() const7829 const EnumValueDescriptor* FieldDescriptor::default_value_enum() const {
7830   if (type_once_) {
7831     internal::call_once(type_once_->once, FieldDescriptor::TypeOnceInit, this);
7832   }
7833   return default_value_enum_;
7834 }
7835 
PrintableNameForExtension() const7836 const std::string& FieldDescriptor::PrintableNameForExtension() const {
7837   const bool is_message_set_extension =
7838       is_extension() &&
7839       containing_type()->options().message_set_wire_format() &&
7840       type() == FieldDescriptor::TYPE_MESSAGE && is_optional() &&
7841       extension_scope() == message_type();
7842   return is_message_set_extension ? message_type()->full_name() : full_name();
7843 }
7844 
InternalDependenciesOnceInit() const7845 void FileDescriptor::InternalDependenciesOnceInit() const {
7846   GOOGLE_CHECK(finished_building_ == true);
7847   auto* names = dependencies_once_->file.dependencies_names;
7848   for (int i = 0; i < dependency_count(); i++) {
7849     if (names[i]) {
7850       dependencies_[i] = pool_->FindFileByName(*names[i]);
7851     }
7852   }
7853 }
7854 
DependenciesOnceInit(const FileDescriptor * to_init)7855 void FileDescriptor::DependenciesOnceInit(const FileDescriptor* to_init) {
7856   to_init->InternalDependenciesOnceInit();
7857 }
7858 
dependency(int index) const7859 const FileDescriptor* FileDescriptor::dependency(int index) const {
7860   if (dependencies_once_) {
7861     // Do once init for all indices, as it's unlikely only a single index would
7862     // be called, and saves on internal::call_once allocations.
7863     internal::call_once(dependencies_once_->once,
7864                         FileDescriptor::DependenciesOnceInit, this);
7865   }
7866   return dependencies_[index];
7867 }
7868 
input_type() const7869 const Descriptor* MethodDescriptor::input_type() const {
7870   return input_type_.Get();
7871 }
7872 
output_type() const7873 const Descriptor* MethodDescriptor::output_type() const {
7874   return output_type_.Get();
7875 }
7876 
7877 
7878 namespace internal {
Set(const Descriptor * descriptor)7879 void LazyDescriptor::Set(const Descriptor* descriptor) {
7880   GOOGLE_CHECK(!once_);
7881   descriptor_ = descriptor;
7882 }
7883 
SetLazy(StringPiece name,const FileDescriptor * file)7884 void LazyDescriptor::SetLazy(StringPiece name,
7885                              const FileDescriptor* file) {
7886   // verify Init() has been called and Set hasn't been called yet.
7887   GOOGLE_CHECK(!descriptor_);
7888   GOOGLE_CHECK(!once_);
7889   GOOGLE_CHECK(file && file->pool_);
7890   GOOGLE_CHECK(file->pool_->lazily_build_dependencies_);
7891   GOOGLE_CHECK(!file->finished_building_);
7892   once_ = file->pool_->tables_->AllocateLazyInit();
7893   once_->descriptor.file = file;
7894   once_->descriptor.name = file->pool_->tables_->AllocateString(name);
7895 }
7896 
Once()7897 void LazyDescriptor::Once() {
7898   if (once_) {
7899     internal::call_once(once_->once, LazyDescriptor::OnceStatic, this);
7900   }
7901 }
7902 
OnceStatic(LazyDescriptor * lazy)7903 void LazyDescriptor::OnceStatic(LazyDescriptor* lazy) { lazy->OnceInternal(); }
7904 
OnceInternal()7905 void LazyDescriptor::OnceInternal() {
7906   auto* file = once_->descriptor.file;
7907   auto* name = once_->descriptor.name;
7908   GOOGLE_CHECK(file->finished_building_);
7909   if (!descriptor_ && name) {
7910     auto* descriptor =
7911         file->pool_->CrossLinkOnDemandHelper(*name, false).descriptor();
7912     if (descriptor != nullptr) {
7913       descriptor_ = descriptor;
7914     }
7915   }
7916 }
7917 }  // namespace internal
7918 
7919 }  // namespace protobuf
7920 }  // namespace google
7921 
7922 #include <google/protobuf/port_undef.inc>
7923