1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
36 
37 #include <cstdint>
38 #include <functional>
39 #include <limits>
40 #include <map>
41 #include <queue>
42 #include <unordered_set>
43 #include <vector>
44 
45 #include <google/protobuf/stubs/common.h>
46 #include <google/protobuf/stubs/logging.h>
47 #include <google/protobuf/compiler/cpp/cpp_options.h>
48 #include <google/protobuf/compiler/cpp/cpp_names.h>
49 #include <google/protobuf/descriptor.pb.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/compiler/scc.h>
52 #include <google/protobuf/io/printer.h>
53 #include <google/protobuf/io/zero_copy_stream.h>
54 #include <google/protobuf/dynamic_message.h>
55 #include <google/protobuf/wire_format.h>
56 #include <google/protobuf/wire_format_lite.h>
57 #include <google/protobuf/stubs/strutil.h>
58 #include <google/protobuf/stubs/substitute.h>
59 #include <google/protobuf/stubs/hash.h>
60 
61 #include <google/protobuf/port_def.inc>
62 
63 namespace google {
64 namespace protobuf {
65 namespace compiler {
66 namespace cpp {
67 
68 namespace {
69 
70 static const char kAnyMessageName[] = "Any";
71 static const char kAnyProtoFile[] = "google/protobuf/any.proto";
72 
DotsToColons(const std::string & name)73 std::string DotsToColons(const std::string& name) {
74   return StringReplace(name, ".", "::", true);
75 }
76 
77 static const char* const kKeywordList[] = {  //
78     "NULL",
79     "alignas",
80     "alignof",
81     "and",
82     "and_eq",
83     "asm",
84     "auto",
85     "bitand",
86     "bitor",
87     "bool",
88     "break",
89     "case",
90     "catch",
91     "char",
92     "class",
93     "compl",
94     "const",
95     "constexpr",
96     "const_cast",
97     "continue",
98     "decltype",
99     "default",
100     "delete",
101     "do",
102     "double",
103     "dynamic_cast",
104     "else",
105     "enum",
106     "explicit",
107     "export",
108     "extern",
109     "false",
110     "float",
111     "for",
112     "friend",
113     "goto",
114     "if",
115     "inline",
116     "int",
117     "long",
118     "mutable",
119     "namespace",
120     "new",
121     "noexcept",
122     "not",
123     "not_eq",
124     "nullptr",
125     "operator",
126     "or",
127     "or_eq",
128     "private",
129     "protected",
130     "public",
131     "register",
132     "reinterpret_cast",
133     "return",
134     "short",
135     "signed",
136     "sizeof",
137     "static",
138     "static_assert",
139     "static_cast",
140     "struct",
141     "switch",
142     "template",
143     "this",
144     "thread_local",
145     "throw",
146     "true",
147     "try",
148     "typedef",
149     "typeid",
150     "typename",
151     "union",
152     "unsigned",
153     "using",
154     "virtual",
155     "void",
156     "volatile",
157     "wchar_t",
158     "while",
159     "xor",
160     "xor_eq"};
161 
MakeKeywordsMap()162 static std::unordered_set<std::string>* MakeKeywordsMap() {
163   auto* result = new std::unordered_set<std::string>();
164   for (const auto keyword : kKeywordList) {
165     result->emplace(keyword);
166   }
167   return result;
168 }
169 
170 static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
171 
172 // Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
Base63Char(int value)173 char Base63Char(int value) {
174   GOOGLE_CHECK_GE(value, 0);
175   if (value < 26) return 'A' + value;
176   value -= 26;
177   if (value < 26) return 'a' + value;
178   value -= 26;
179   if (value < 10) return '0' + value;
180   GOOGLE_CHECK_EQ(value, 10);
181   return '_';
182 }
183 
184 // Given a c identifier has 63 legal characters we can't implement base64
185 // encoding. So we return the k least significant "digits" in base 63.
186 template <typename I>
Base63(I n,int k)187 std::string Base63(I n, int k) {
188   std::string res;
189   while (k-- > 0) {
190     res += Base63Char(static_cast<int>(n % 63));
191     n /= 63;
192   }
193   return res;
194 }
195 
IntTypeName(const Options & options,const std::string & type)196 std::string IntTypeName(const Options& options, const std::string& type) {
197   if (options.opensource_runtime) {
198     return "::PROTOBUF_NAMESPACE_ID::" + type;
199   } else {
200     return "::" + type;
201   }
202 }
203 
SetIntVar(const Options & options,const std::string & type,std::map<std::string,std::string> * variables)204 void SetIntVar(const Options& options, const std::string& type,
205                std::map<std::string, std::string>* variables) {
206   (*variables)[type] = IntTypeName(options, type);
207 }
IsEagerlyVerifiedLazyImpl(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)208 bool IsEagerlyVerifiedLazyImpl(const FieldDescriptor* field,
209                                const Options& options,
210                                MessageSCCAnalyzer* scc_analyzer) {
211   return false;
212 }
213 
214 }  // namespace
215 
IsLazy(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)216 bool IsLazy(const FieldDescriptor* field, const Options& options,
217             MessageSCCAnalyzer* scc_analyzer) {
218   return IsLazilyVerifiedLazy(field, options) ||
219          IsEagerlyVerifiedLazyImpl(field, options, scc_analyzer);
220 }
221 
SetCommonVars(const Options & options,std::map<std::string,std::string> * variables)222 void SetCommonVars(const Options& options,
223                    std::map<std::string, std::string>* variables) {
224   (*variables)["proto_ns"] = ProtobufNamespace(options);
225 
226   // Warning: there is some clever naming/splitting here to avoid extract script
227   // rewrites.  The names of these variables must not be things that the extract
228   // script will rewrite.  That's why we use "CHK" (for example) instead of
229   // "GOOGLE_CHECK".
230   if (options.opensource_runtime) {
231     (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
232     (*variables)["CHK"] = "GOOGLE_CHECK";
233     (*variables)["DCHK"] = "GOOGLE_DCHECK";
234   } else {
235     // These values are things the extract script would rewrite if we did not
236     // split them.  It might not strictly matter since we don't generate google3
237     // code in open-source.  But it's good to prevent surprising things from
238     // happening.
239     (*variables)["GOOGLE_PROTOBUF"] =
240         "GOOGLE3"
241         "_PROTOBUF";
242     (*variables)["CHK"] =
243         "CH"
244         "ECK";
245     (*variables)["DCHK"] =
246         "DCH"
247         "ECK";
248   }
249 
250   SetIntVar(options, "int8", variables);
251   SetIntVar(options, "uint8", variables);
252   SetIntVar(options, "uint32", variables);
253   SetIntVar(options, "uint64", variables);
254   SetIntVar(options, "int32", variables);
255   SetIntVar(options, "int64", variables);
256   (*variables)["string"] = "std::string";
257 }
258 
SetUnknownFieldsVariable(const Descriptor * descriptor,const Options & options,std::map<std::string,std::string> * variables)259 void SetUnknownFieldsVariable(const Descriptor* descriptor,
260                               const Options& options,
261                               std::map<std::string, std::string>* variables) {
262   std::string proto_ns = ProtobufNamespace(options);
263   std::string unknown_fields_type;
264   if (UseUnknownFieldSet(descriptor->file(), options)) {
265     unknown_fields_type = "::" + proto_ns + "::UnknownFieldSet";
266     (*variables)["unknown_fields"] =
267         "_internal_metadata_.unknown_fields<" + unknown_fields_type + ">(" +
268         unknown_fields_type + "::default_instance)";
269   } else {
270     unknown_fields_type =
271         PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
272     (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields<" +
273                                      unknown_fields_type + ">(::" + proto_ns +
274                                      "::internal::GetEmptyString)";
275   }
276   (*variables)["unknown_fields_type"] = unknown_fields_type;
277   (*variables)["have_unknown_fields"] =
278       "_internal_metadata_.have_unknown_fields()";
279   (*variables)["mutable_unknown_fields"] =
280       "_internal_metadata_.mutable_unknown_fields<" + unknown_fields_type +
281       ">()";
282 }
283 
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)284 std::string UnderscoresToCamelCase(const std::string& input,
285                                    bool cap_next_letter) {
286   std::string result;
287   // Note:  I distrust ctype.h due to locales.
288   for (int i = 0; i < input.size(); i++) {
289     if ('a' <= input[i] && input[i] <= 'z') {
290       if (cap_next_letter) {
291         result += input[i] + ('A' - 'a');
292       } else {
293         result += input[i];
294       }
295       cap_next_letter = false;
296     } else if ('A' <= input[i] && input[i] <= 'Z') {
297       // Capital letters are left as-is.
298       result += input[i];
299       cap_next_letter = false;
300     } else if ('0' <= input[i] && input[i] <= '9') {
301       result += input[i];
302       cap_next_letter = true;
303     } else {
304       cap_next_letter = true;
305     }
306   }
307   return result;
308 }
309 
310 const char kThickSeparator[] =
311     "// ===================================================================\n";
312 const char kThinSeparator[] =
313     "// -------------------------------------------------------------------\n";
314 
CanInitializeByZeroing(const FieldDescriptor * field)315 bool CanInitializeByZeroing(const FieldDescriptor* field) {
316   if (field->is_repeated() || field->is_extension()) return false;
317   switch (field->cpp_type()) {
318     case FieldDescriptor::CPPTYPE_ENUM:
319       return field->default_value_enum()->number() == 0;
320     case FieldDescriptor::CPPTYPE_INT32:
321       return field->default_value_int32() == 0;
322     case FieldDescriptor::CPPTYPE_INT64:
323       return field->default_value_int64() == 0;
324     case FieldDescriptor::CPPTYPE_UINT32:
325       return field->default_value_uint32() == 0;
326     case FieldDescriptor::CPPTYPE_UINT64:
327       return field->default_value_uint64() == 0;
328     case FieldDescriptor::CPPTYPE_FLOAT:
329       return field->default_value_float() == 0;
330     case FieldDescriptor::CPPTYPE_DOUBLE:
331       return field->default_value_double() == 0;
332     case FieldDescriptor::CPPTYPE_BOOL:
333       return field->default_value_bool() == false;
334     default:
335       return false;
336   }
337 }
338 
ClassName(const Descriptor * descriptor)339 std::string ClassName(const Descriptor* descriptor) {
340   const Descriptor* parent = descriptor->containing_type();
341   std::string res;
342   if (parent) res += ClassName(parent) + "_";
343   res += descriptor->name();
344   if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
345   return ResolveKeyword(res);
346 }
347 
ClassName(const EnumDescriptor * enum_descriptor)348 std::string ClassName(const EnumDescriptor* enum_descriptor) {
349   if (enum_descriptor->containing_type() == nullptr) {
350     return ResolveKeyword(enum_descriptor->name());
351   } else {
352     return ClassName(enum_descriptor->containing_type()) + "_" +
353            enum_descriptor->name();
354   }
355 }
356 
QualifiedClassName(const Descriptor * d,const Options & options)357 std::string QualifiedClassName(const Descriptor* d, const Options& options) {
358   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
359 }
360 
QualifiedClassName(const EnumDescriptor * d,const Options & options)361 std::string QualifiedClassName(const EnumDescriptor* d,
362                                const Options& options) {
363   return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
364 }
365 
QualifiedClassName(const Descriptor * d)366 std::string QualifiedClassName(const Descriptor* d) {
367   return QualifiedClassName(d, Options());
368 }
369 
QualifiedClassName(const EnumDescriptor * d)370 std::string QualifiedClassName(const EnumDescriptor* d) {
371   return QualifiedClassName(d, Options());
372 }
373 
ExtensionName(const FieldDescriptor * d)374 std::string ExtensionName(const FieldDescriptor* d) {
375   if (const Descriptor* scope = d->extension_scope())
376     return StrCat(ClassName(scope), "::", ResolveKeyword(d->name()));
377   return ResolveKeyword(d->name());
378 }
379 
QualifiedExtensionName(const FieldDescriptor * d,const Options & options)380 std::string QualifiedExtensionName(const FieldDescriptor* d,
381                                    const Options& options) {
382   GOOGLE_DCHECK(d->is_extension());
383   return QualifiedFileLevelSymbol(d->file(), ExtensionName(d), options);
384 }
385 
QualifiedExtensionName(const FieldDescriptor * d)386 std::string QualifiedExtensionName(const FieldDescriptor* d) {
387   return QualifiedExtensionName(d, Options());
388 }
389 
Namespace(const std::string & package)390 std::string Namespace(const std::string& package) {
391   if (package.empty()) return "";
392   return "::" + DotsToColons(package);
393 }
394 
Namespace(const FileDescriptor * d,const Options & options)395 std::string Namespace(const FileDescriptor* d, const Options& options) {
396   std::string ret = Namespace(d->package());
397   if (IsWellKnownMessage(d) && options.opensource_runtime) {
398     // Written with string concatenation to prevent rewriting of
399     // ::google::protobuf.
400     ret = StringReplace(ret,
401                         "::google::"
402                         "protobuf",
403                         "PROTOBUF_NAMESPACE_ID", false);
404   }
405   return ret;
406 }
407 
Namespace(const Descriptor * d,const Options & options)408 std::string Namespace(const Descriptor* d, const Options& options) {
409   return Namespace(d->file(), options);
410 }
411 
Namespace(const FieldDescriptor * d,const Options & options)412 std::string Namespace(const FieldDescriptor* d, const Options& options) {
413   return Namespace(d->file(), options);
414 }
415 
Namespace(const EnumDescriptor * d,const Options & options)416 std::string Namespace(const EnumDescriptor* d, const Options& options) {
417   return Namespace(d->file(), options);
418 }
419 
DefaultInstanceType(const Descriptor * descriptor,const Options & options)420 std::string DefaultInstanceType(const Descriptor* descriptor,
421                                 const Options& options) {
422   return ClassName(descriptor) + "DefaultTypeInternal";
423 }
424 
DefaultInstanceName(const Descriptor * descriptor,const Options & options)425 std::string DefaultInstanceName(const Descriptor* descriptor,
426                                 const Options& options) {
427   return "_" + ClassName(descriptor, false) + "_default_instance_";
428 }
429 
DefaultInstancePtr(const Descriptor * descriptor,const Options & options)430 std::string DefaultInstancePtr(const Descriptor* descriptor,
431                                const Options& options) {
432   return DefaultInstanceName(descriptor, options) + "ptr_";
433 }
434 
QualifiedDefaultInstanceName(const Descriptor * descriptor,const Options & options)435 std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
436                                          const Options& options) {
437   return QualifiedFileLevelSymbol(
438       descriptor->file(), DefaultInstanceName(descriptor, options), options);
439 }
440 
QualifiedDefaultInstancePtr(const Descriptor * descriptor,const Options & options)441 std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
442                                         const Options& options) {
443   return QualifiedDefaultInstanceName(descriptor, options) + "ptr_";
444 }
445 
DescriptorTableName(const FileDescriptor * file,const Options & options)446 std::string DescriptorTableName(const FileDescriptor* file,
447                                 const Options& options) {
448   return UniqueName("descriptor_table", file, options);
449 }
450 
FileDllExport(const FileDescriptor * file,const Options & options)451 std::string FileDllExport(const FileDescriptor* file, const Options& options) {
452   return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
453 }
454 
SuperClassName(const Descriptor * descriptor,const Options & options)455 std::string SuperClassName(const Descriptor* descriptor,
456                            const Options& options) {
457   return "::" + ProtobufNamespace(options) +
458          (HasDescriptorMethods(descriptor->file(), options) ? "::Message"
459                                                             : "::MessageLite");
460 }
461 
ResolveKeyword(const std::string & name)462 std::string ResolveKeyword(const std::string& name) {
463   if (kKeywords.count(name) > 0) {
464     return name + "_";
465   }
466   return name;
467 }
468 
FieldName(const FieldDescriptor * field)469 std::string FieldName(const FieldDescriptor* field) {
470   std::string result = field->name();
471   LowerString(&result);
472   if (kKeywords.count(result) > 0) {
473     result.append("_");
474   }
475   return result;
476 }
477 
EnumValueName(const EnumValueDescriptor * enum_value)478 std::string EnumValueName(const EnumValueDescriptor* enum_value) {
479   std::string result = enum_value->name();
480   if (kKeywords.count(result) > 0) {
481     result.append("_");
482   }
483   return result;
484 }
485 
EstimateAlignmentSize(const FieldDescriptor * field)486 int EstimateAlignmentSize(const FieldDescriptor* field) {
487   if (field == nullptr) return 0;
488   if (field->is_repeated()) return 8;
489   switch (field->cpp_type()) {
490     case FieldDescriptor::CPPTYPE_BOOL:
491       return 1;
492 
493     case FieldDescriptor::CPPTYPE_INT32:
494     case FieldDescriptor::CPPTYPE_UINT32:
495     case FieldDescriptor::CPPTYPE_ENUM:
496     case FieldDescriptor::CPPTYPE_FLOAT:
497       return 4;
498 
499     case FieldDescriptor::CPPTYPE_INT64:
500     case FieldDescriptor::CPPTYPE_UINT64:
501     case FieldDescriptor::CPPTYPE_DOUBLE:
502     case FieldDescriptor::CPPTYPE_STRING:
503     case FieldDescriptor::CPPTYPE_MESSAGE:
504       return 8;
505   }
506   GOOGLE_LOG(FATAL) << "Can't get here.";
507   return -1;  // Make compiler happy.
508 }
509 
FieldConstantName(const FieldDescriptor * field)510 std::string FieldConstantName(const FieldDescriptor* field) {
511   std::string field_name = UnderscoresToCamelCase(field->name(), true);
512   std::string result = "k" + field_name + "FieldNumber";
513 
514   if (!field->is_extension() &&
515       field->containing_type()->FindFieldByCamelcaseName(
516           field->camelcase_name()) != field) {
517     // This field's camelcase name is not unique.  As a hack, add the field
518     // number to the constant name.  This makes the constant rather useless,
519     // but what can we do?
520     result += "_" + StrCat(field->number());
521   }
522 
523   return result;
524 }
525 
FieldMessageTypeName(const FieldDescriptor * field,const Options & options)526 std::string FieldMessageTypeName(const FieldDescriptor* field,
527                                  const Options& options) {
528   // Note:  The Google-internal version of Protocol Buffers uses this function
529   //   as a hook point for hacks to support legacy code.
530   return QualifiedClassName(field->message_type(), options);
531 }
532 
StripProto(const std::string & filename)533 std::string StripProto(const std::string& filename) {
534   /*
535    * TODO(github/georgthegreat) remove this proxy method
536    * once Google's internal codebase will become ready
537    */
538   return compiler::StripProto(filename);
539 }
540 
PrimitiveTypeName(FieldDescriptor::CppType type)541 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
542   switch (type) {
543     case FieldDescriptor::CPPTYPE_INT32:
544       return "::google::protobuf::int32";
545     case FieldDescriptor::CPPTYPE_INT64:
546       return "::google::protobuf::int64";
547     case FieldDescriptor::CPPTYPE_UINT32:
548       return "::google::protobuf::uint32";
549     case FieldDescriptor::CPPTYPE_UINT64:
550       return "::google::protobuf::uint64";
551     case FieldDescriptor::CPPTYPE_DOUBLE:
552       return "double";
553     case FieldDescriptor::CPPTYPE_FLOAT:
554       return "float";
555     case FieldDescriptor::CPPTYPE_BOOL:
556       return "bool";
557     case FieldDescriptor::CPPTYPE_ENUM:
558       return "int";
559     case FieldDescriptor::CPPTYPE_STRING:
560       return "std::string";
561     case FieldDescriptor::CPPTYPE_MESSAGE:
562       return nullptr;
563 
564       // No default because we want the compiler to complain if any new
565       // CppTypes are added.
566   }
567 
568   GOOGLE_LOG(FATAL) << "Can't get here.";
569   return nullptr;
570 }
571 
PrimitiveTypeName(const Options & options,FieldDescriptor::CppType type)572 std::string PrimitiveTypeName(const Options& options,
573                               FieldDescriptor::CppType type) {
574   switch (type) {
575     case FieldDescriptor::CPPTYPE_INT32:
576       return IntTypeName(options, "int32");
577     case FieldDescriptor::CPPTYPE_INT64:
578       return IntTypeName(options, "int64");
579     case FieldDescriptor::CPPTYPE_UINT32:
580       return IntTypeName(options, "uint32");
581     case FieldDescriptor::CPPTYPE_UINT64:
582       return IntTypeName(options, "uint64");
583     case FieldDescriptor::CPPTYPE_DOUBLE:
584       return "double";
585     case FieldDescriptor::CPPTYPE_FLOAT:
586       return "float";
587     case FieldDescriptor::CPPTYPE_BOOL:
588       return "bool";
589     case FieldDescriptor::CPPTYPE_ENUM:
590       return "int";
591     case FieldDescriptor::CPPTYPE_STRING:
592       return "std::string";
593     case FieldDescriptor::CPPTYPE_MESSAGE:
594       return "";
595 
596       // No default because we want the compiler to complain if any new
597       // CppTypes are added.
598   }
599 
600   GOOGLE_LOG(FATAL) << "Can't get here.";
601   return "";
602 }
603 
DeclaredTypeMethodName(FieldDescriptor::Type type)604 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
605   switch (type) {
606     case FieldDescriptor::TYPE_INT32:
607       return "Int32";
608     case FieldDescriptor::TYPE_INT64:
609       return "Int64";
610     case FieldDescriptor::TYPE_UINT32:
611       return "UInt32";
612     case FieldDescriptor::TYPE_UINT64:
613       return "UInt64";
614     case FieldDescriptor::TYPE_SINT32:
615       return "SInt32";
616     case FieldDescriptor::TYPE_SINT64:
617       return "SInt64";
618     case FieldDescriptor::TYPE_FIXED32:
619       return "Fixed32";
620     case FieldDescriptor::TYPE_FIXED64:
621       return "Fixed64";
622     case FieldDescriptor::TYPE_SFIXED32:
623       return "SFixed32";
624     case FieldDescriptor::TYPE_SFIXED64:
625       return "SFixed64";
626     case FieldDescriptor::TYPE_FLOAT:
627       return "Float";
628     case FieldDescriptor::TYPE_DOUBLE:
629       return "Double";
630 
631     case FieldDescriptor::TYPE_BOOL:
632       return "Bool";
633     case FieldDescriptor::TYPE_ENUM:
634       return "Enum";
635 
636     case FieldDescriptor::TYPE_STRING:
637       return "String";
638     case FieldDescriptor::TYPE_BYTES:
639       return "Bytes";
640     case FieldDescriptor::TYPE_GROUP:
641       return "Group";
642     case FieldDescriptor::TYPE_MESSAGE:
643       return "Message";
644 
645       // No default because we want the compiler to complain if any new
646       // types are added.
647   }
648   GOOGLE_LOG(FATAL) << "Can't get here.";
649   return "";
650 }
651 
Int32ToString(int number)652 std::string Int32ToString(int number) {
653   if (number == std::numeric_limits<int32_t>::min()) {
654     // This needs to be special-cased, see explanation here:
655     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
656     return StrCat(number + 1, " - 1");
657   } else {
658     return StrCat(number);
659   }
660 }
661 
Int64ToString(int64_t number)662 static std::string Int64ToString(int64_t number) {
663   if (number == std::numeric_limits<int64_t>::min()) {
664     // This needs to be special-cased, see explanation here:
665     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
666     return StrCat("int64_t{", number + 1, "} - 1");
667   }
668   return StrCat("int64_t{", number, "}");
669 }
670 
UInt64ToString(uint64_t number)671 static std::string UInt64ToString(uint64_t number) {
672   return StrCat("uint64_t{", number, "u}");
673 }
674 
DefaultValue(const FieldDescriptor * field)675 std::string DefaultValue(const FieldDescriptor* field) {
676   return DefaultValue(Options(), field);
677 }
678 
DefaultValue(const Options & options,const FieldDescriptor * field)679 std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
680   switch (field->cpp_type()) {
681     case FieldDescriptor::CPPTYPE_INT32:
682       return Int32ToString(field->default_value_int32());
683     case FieldDescriptor::CPPTYPE_UINT32:
684       return StrCat(field->default_value_uint32()) + "u";
685     case FieldDescriptor::CPPTYPE_INT64:
686       return Int64ToString(field->default_value_int64());
687     case FieldDescriptor::CPPTYPE_UINT64:
688       return UInt64ToString(field->default_value_uint64());
689     case FieldDescriptor::CPPTYPE_DOUBLE: {
690       double value = field->default_value_double();
691       if (value == std::numeric_limits<double>::infinity()) {
692         return "std::numeric_limits<double>::infinity()";
693       } else if (value == -std::numeric_limits<double>::infinity()) {
694         return "-std::numeric_limits<double>::infinity()";
695       } else if (value != value) {
696         return "std::numeric_limits<double>::quiet_NaN()";
697       } else {
698         return SimpleDtoa(value);
699       }
700     }
701     case FieldDescriptor::CPPTYPE_FLOAT: {
702       float value = field->default_value_float();
703       if (value == std::numeric_limits<float>::infinity()) {
704         return "std::numeric_limits<float>::infinity()";
705       } else if (value == -std::numeric_limits<float>::infinity()) {
706         return "-std::numeric_limits<float>::infinity()";
707       } else if (value != value) {
708         return "std::numeric_limits<float>::quiet_NaN()";
709       } else {
710         std::string float_value = SimpleFtoa(value);
711         // If floating point value contains a period (.) or an exponent
712         // (either E or e), then append suffix 'f' to make it a float
713         // literal.
714         if (float_value.find_first_of(".eE") != std::string::npos) {
715           float_value.push_back('f');
716         }
717         return float_value;
718       }
719     }
720     case FieldDescriptor::CPPTYPE_BOOL:
721       return field->default_value_bool() ? "true" : "false";
722     case FieldDescriptor::CPPTYPE_ENUM:
723       // Lazy:  Generate a static_cast because we don't have a helper function
724       //   that constructs the full name of an enum value.
725       return strings::Substitute(
726           "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
727           Int32ToString(field->default_value_enum()->number()));
728     case FieldDescriptor::CPPTYPE_STRING:
729       return "\"" +
730              EscapeTrigraphs(CEscape(field->default_value_string())) +
731              "\"";
732     case FieldDescriptor::CPPTYPE_MESSAGE:
733       return "*" + FieldMessageTypeName(field, options) +
734              "::internal_default_instance()";
735   }
736   // Can't actually get here; make compiler happy.  (We could add a default
737   // case above but then we wouldn't get the nice compiler warning when a
738   // new type is added.)
739   GOOGLE_LOG(FATAL) << "Can't get here.";
740   return "";
741 }
742 
743 // Convert a file name into a valid identifier.
FilenameIdentifier(const std::string & filename)744 std::string FilenameIdentifier(const std::string& filename) {
745   std::string result;
746   for (int i = 0; i < filename.size(); i++) {
747     if (ascii_isalnum(filename[i])) {
748       result.push_back(filename[i]);
749     } else {
750       // Not alphanumeric.  To avoid any possibility of name conflicts we
751       // use the hex code for the character.
752       StrAppend(&result, "_",
753                       strings::Hex(static_cast<uint8_t>(filename[i])));
754     }
755   }
756   return result;
757 }
758 
UniqueName(const std::string & name,const std::string & filename,const Options & options)759 std::string UniqueName(const std::string& name, const std::string& filename,
760                        const Options& options) {
761   return name + "_" + FilenameIdentifier(filename);
762 }
763 
764 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const FileDescriptor * file,const std::string & name,const Options & options)765 std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
766                                      const std::string& name,
767                                      const Options& options) {
768   if (file->package().empty()) {
769     return StrCat("::", name);
770   }
771   return StrCat(Namespace(file, options), "::", name);
772 }
773 
774 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const std::string & to_escape)775 std::string EscapeTrigraphs(const std::string& to_escape) {
776   return StringReplace(to_escape, "?", "\\?", true);
777 }
778 
779 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const std::string & prefix)780 std::string SafeFunctionName(const Descriptor* descriptor,
781                              const FieldDescriptor* field,
782                              const std::string& prefix) {
783   // Do not use FieldName() since it will escape keywords.
784   std::string name = field->name();
785   LowerString(&name);
786   std::string function_name = prefix + name;
787   if (descriptor->FindFieldByName(function_name)) {
788     // Single underscore will also make it conflicting with the private data
789     // member. We use double underscore to escape function names.
790     function_name.append("__");
791   } else if (kKeywords.count(name) > 0) {
792     // If the field name is a keyword, we append the underscore back to keep it
793     // consistent with other function names.
794     function_name.append("_");
795   }
796   return function_name;
797 }
798 
HasLazyFields(const Descriptor * descriptor,const Options & options,MessageSCCAnalyzer * scc_analyzer)799 static bool HasLazyFields(const Descriptor* descriptor, const Options& options,
800                           MessageSCCAnalyzer* scc_analyzer) {
801   for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
802     if (IsLazy(descriptor->field(field_idx), options, scc_analyzer)) {
803       return true;
804     }
805   }
806   for (int idx = 0; idx < descriptor->extension_count(); idx++) {
807     if (IsLazy(descriptor->extension(idx), options, scc_analyzer)) {
808       return true;
809     }
810   }
811   for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
812     if (HasLazyFields(descriptor->nested_type(idx), options, scc_analyzer)) {
813       return true;
814     }
815   }
816   return false;
817 }
818 
819 // Does the given FileDescriptor use lazy fields?
HasLazyFields(const FileDescriptor * file,const Options & options,MessageSCCAnalyzer * scc_analyzer)820 bool HasLazyFields(const FileDescriptor* file, const Options& options,
821                    MessageSCCAnalyzer* scc_analyzer) {
822   for (int i = 0; i < file->message_type_count(); i++) {
823     const Descriptor* descriptor(file->message_type(i));
824     if (HasLazyFields(descriptor, options, scc_analyzer)) {
825       return true;
826     }
827   }
828   for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
829     if (IsLazy(file->extension(field_idx), options, scc_analyzer)) {
830       return true;
831     }
832   }
833   return false;
834 }
835 
HasRepeatedFields(const Descriptor * descriptor)836 static bool HasRepeatedFields(const Descriptor* descriptor) {
837   for (int i = 0; i < descriptor->field_count(); ++i) {
838     if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
839       return true;
840     }
841   }
842   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
843     if (HasRepeatedFields(descriptor->nested_type(i))) return true;
844   }
845   return false;
846 }
847 
HasRepeatedFields(const FileDescriptor * file)848 bool HasRepeatedFields(const FileDescriptor* file) {
849   for (int i = 0; i < file->message_type_count(); ++i) {
850     if (HasRepeatedFields(file->message_type(i))) return true;
851   }
852   return false;
853 }
854 
IsStringPieceField(const FieldDescriptor * field,const Options & options)855 static bool IsStringPieceField(const FieldDescriptor* field,
856                                const Options& options) {
857   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
858          EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
859 }
860 
HasStringPieceFields(const Descriptor * descriptor,const Options & options)861 static bool HasStringPieceFields(const Descriptor* descriptor,
862                                  const Options& options) {
863   for (int i = 0; i < descriptor->field_count(); ++i) {
864     if (IsStringPieceField(descriptor->field(i), options)) return true;
865   }
866   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
867     if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
868   }
869   return false;
870 }
871 
HasStringPieceFields(const FileDescriptor * file,const Options & options)872 bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
873   for (int i = 0; i < file->message_type_count(); ++i) {
874     if (HasStringPieceFields(file->message_type(i), options)) return true;
875   }
876   return false;
877 }
878 
IsCordField(const FieldDescriptor * field,const Options & options)879 static bool IsCordField(const FieldDescriptor* field, const Options& options) {
880   return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
881          EffectiveStringCType(field, options) == FieldOptions::CORD;
882 }
883 
HasCordFields(const Descriptor * descriptor,const Options & options)884 static bool HasCordFields(const Descriptor* descriptor,
885                           const Options& options) {
886   for (int i = 0; i < descriptor->field_count(); ++i) {
887     if (IsCordField(descriptor->field(i), options)) return true;
888   }
889   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
890     if (HasCordFields(descriptor->nested_type(i), options)) return true;
891   }
892   return false;
893 }
894 
HasCordFields(const FileDescriptor * file,const Options & options)895 bool HasCordFields(const FileDescriptor* file, const Options& options) {
896   for (int i = 0; i < file->message_type_count(); ++i) {
897     if (HasCordFields(file->message_type(i), options)) return true;
898   }
899   return false;
900 }
901 
HasExtensionsOrExtendableMessage(const Descriptor * descriptor)902 static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
903   if (descriptor->extension_range_count() > 0) return true;
904   if (descriptor->extension_count() > 0) return true;
905   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
906     if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
907       return true;
908     }
909   }
910   return false;
911 }
912 
HasExtensionsOrExtendableMessage(const FileDescriptor * file)913 bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
914   if (file->extension_count() > 0) return true;
915   for (int i = 0; i < file->message_type_count(); ++i) {
916     if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
917   }
918   return false;
919 }
920 
HasMapFields(const Descriptor * descriptor)921 static bool HasMapFields(const Descriptor* descriptor) {
922   for (int i = 0; i < descriptor->field_count(); ++i) {
923     if (descriptor->field(i)->is_map()) {
924       return true;
925     }
926   }
927   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
928     if (HasMapFields(descriptor->nested_type(i))) return true;
929   }
930   return false;
931 }
932 
HasMapFields(const FileDescriptor * file)933 bool HasMapFields(const FileDescriptor* file) {
934   for (int i = 0; i < file->message_type_count(); ++i) {
935     if (HasMapFields(file->message_type(i))) return true;
936   }
937   return false;
938 }
939 
HasEnumDefinitions(const Descriptor * message_type)940 static bool HasEnumDefinitions(const Descriptor* message_type) {
941   if (message_type->enum_type_count() > 0) return true;
942   for (int i = 0; i < message_type->nested_type_count(); ++i) {
943     if (HasEnumDefinitions(message_type->nested_type(i))) return true;
944   }
945   return false;
946 }
947 
HasEnumDefinitions(const FileDescriptor * file)948 bool HasEnumDefinitions(const FileDescriptor* file) {
949   if (file->enum_type_count() > 0) return true;
950   for (int i = 0; i < file->message_type_count(); ++i) {
951     if (HasEnumDefinitions(file->message_type(i))) return true;
952   }
953   return false;
954 }
955 
IsStringOrMessage(const FieldDescriptor * field)956 bool IsStringOrMessage(const FieldDescriptor* field) {
957   switch (field->cpp_type()) {
958     case FieldDescriptor::CPPTYPE_INT32:
959     case FieldDescriptor::CPPTYPE_INT64:
960     case FieldDescriptor::CPPTYPE_UINT32:
961     case FieldDescriptor::CPPTYPE_UINT64:
962     case FieldDescriptor::CPPTYPE_DOUBLE:
963     case FieldDescriptor::CPPTYPE_FLOAT:
964     case FieldDescriptor::CPPTYPE_BOOL:
965     case FieldDescriptor::CPPTYPE_ENUM:
966       return false;
967     case FieldDescriptor::CPPTYPE_STRING:
968     case FieldDescriptor::CPPTYPE_MESSAGE:
969       return true;
970   }
971 
972   GOOGLE_LOG(FATAL) << "Can't get here.";
973   return false;
974 }
975 
EffectiveStringCType(const FieldDescriptor * field,const Options & options)976 FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
977                                          const Options& options) {
978   GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
979   if (options.opensource_runtime) {
980     // Open-source protobuf release only supports STRING ctype.
981     return FieldOptions::STRING;
982   } else {
983     // Google-internal supports all ctypes.
984     return field->options().ctype();
985   }
986 }
987 
IsAnyMessage(const FileDescriptor * descriptor,const Options & options)988 bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
989   return descriptor->name() == kAnyProtoFile;
990 }
991 
IsAnyMessage(const Descriptor * descriptor,const Options & options)992 bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
993   return descriptor->name() == kAnyMessageName &&
994          IsAnyMessage(descriptor->file(), options);
995 }
996 
IsWellKnownMessage(const FileDescriptor * file)997 bool IsWellKnownMessage(const FileDescriptor* file) {
998   static const std::unordered_set<std::string> well_known_files{
999       "google/protobuf/any.proto",
1000       "google/protobuf/api.proto",
1001       "google/protobuf/compiler/plugin.proto",
1002       "google/protobuf/descriptor.proto",
1003       "google/protobuf/duration.proto",
1004       "google/protobuf/empty.proto",
1005       "google/protobuf/field_mask.proto",
1006       "google/protobuf/source_context.proto",
1007       "google/protobuf/struct.proto",
1008       "google/protobuf/timestamp.proto",
1009       "google/protobuf/type.proto",
1010       "google/protobuf/wrappers.proto",
1011   };
1012   return well_known_files.find(file->name()) != well_known_files.end();
1013 }
1014 
FieldEnforceUtf8(const FieldDescriptor * field,const Options & options)1015 static bool FieldEnforceUtf8(const FieldDescriptor* field,
1016                              const Options& options) {
1017   return true;
1018 }
1019 
FileUtf8Verification(const FileDescriptor * file,const Options & options)1020 static bool FileUtf8Verification(const FileDescriptor* file,
1021                                  const Options& options) {
1022   return true;
1023 }
1024 
1025 // Which level of UTF-8 enforcemant is placed on this file.
GetUtf8CheckMode(const FieldDescriptor * field,const Options & options)1026 Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
1027                                const Options& options) {
1028   if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1029       FieldEnforceUtf8(field, options)) {
1030     return Utf8CheckMode::kStrict;
1031   } else if (GetOptimizeFor(field->file(), options) !=
1032                  FileOptions::LITE_RUNTIME &&
1033              FileUtf8Verification(field->file(), options)) {
1034     return Utf8CheckMode::kVerify;
1035   } else {
1036     return Utf8CheckMode::kNone;
1037   }
1038 }
1039 
GenerateUtf8CheckCode(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const char * strict_function,const char * verify_function,const Formatter & format)1040 static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1041                                   const Options& options, bool for_parse,
1042                                   const char* parameters,
1043                                   const char* strict_function,
1044                                   const char* verify_function,
1045                                   const Formatter& format) {
1046   switch (GetUtf8CheckMode(field, options)) {
1047     case Utf8CheckMode::kStrict: {
1048       if (for_parse) {
1049         format("DO_(");
1050       }
1051       format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1052       format.Indent();
1053       format(parameters);
1054       if (for_parse) {
1055         format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1056       } else {
1057         format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1058       }
1059       format("\"$1$\")", field->full_name());
1060       if (for_parse) {
1061         format(")");
1062       }
1063       format(";\n");
1064       format.Outdent();
1065       break;
1066     }
1067     case Utf8CheckMode::kVerify: {
1068       format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1069       format.Indent();
1070       format(parameters);
1071       if (for_parse) {
1072         format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1073       } else {
1074         format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1075       }
1076       format("\"$1$\");\n", field->full_name());
1077       format.Outdent();
1078       break;
1079     }
1080     case Utf8CheckMode::kNone:
1081       break;
1082   }
1083 }
1084 
GenerateUtf8CheckCodeForString(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1085 void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1086                                     const Options& options, bool for_parse,
1087                                     const char* parameters,
1088                                     const Formatter& format) {
1089   GenerateUtf8CheckCode(field, options, for_parse, parameters,
1090                         "VerifyUtf8String", "VerifyUTF8StringNamedField",
1091                         format);
1092 }
1093 
GenerateUtf8CheckCodeForCord(const FieldDescriptor * field,const Options & options,bool for_parse,const char * parameters,const Formatter & format)1094 void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1095                                   const Options& options, bool for_parse,
1096                                   const char* parameters,
1097                                   const Formatter& format) {
1098   GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1099                         "VerifyUTF8CordNamedField", format);
1100 }
1101 
1102 namespace {
1103 
Flatten(const Descriptor * descriptor,std::vector<const Descriptor * > * flatten)1104 void Flatten(const Descriptor* descriptor,
1105              std::vector<const Descriptor*>* flatten) {
1106   for (int i = 0; i < descriptor->nested_type_count(); i++)
1107     Flatten(descriptor->nested_type(i), flatten);
1108   flatten->push_back(descriptor);
1109 }
1110 
1111 }  // namespace
1112 
FlattenMessagesInFile(const FileDescriptor * file,std::vector<const Descriptor * > * result)1113 void FlattenMessagesInFile(const FileDescriptor* file,
1114                            std::vector<const Descriptor*>* result) {
1115   for (int i = 0; i < file->message_type_count(); i++) {
1116     Flatten(file->message_type(i), result);
1117   }
1118 }
1119 
HasWeakFields(const Descriptor * descriptor,const Options & options)1120 bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1121   for (int i = 0; i < descriptor->field_count(); i++) {
1122     if (IsWeak(descriptor->field(i), options)) return true;
1123   }
1124   return false;
1125 }
1126 
HasWeakFields(const FileDescriptor * file,const Options & options)1127 bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1128   for (int i = 0; i < file->message_type_count(); ++i) {
1129     if (HasWeakFields(file->message_type(i), options)) return true;
1130   }
1131   return false;
1132 }
1133 
UsingImplicitWeakFields(const FileDescriptor * file,const Options & options)1134 bool UsingImplicitWeakFields(const FileDescriptor* file,
1135                              const Options& options) {
1136   return options.lite_implicit_weak_fields &&
1137          GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1138 }
1139 
IsImplicitWeakField(const FieldDescriptor * field,const Options & options,MessageSCCAnalyzer * scc_analyzer)1140 bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1141                          MessageSCCAnalyzer* scc_analyzer) {
1142   return UsingImplicitWeakFields(field->file(), options) &&
1143          field->type() == FieldDescriptor::TYPE_MESSAGE &&
1144          !field->is_required() && !field->is_map() && !field->is_extension() &&
1145          !field->real_containing_oneof() &&
1146          !IsWellKnownMessage(field->message_type()->file()) &&
1147          field->message_type()->file()->name() !=
1148              "net/proto2/proto/descriptor.proto" &&
1149          // We do not support implicit weak fields between messages in the same
1150          // strongly-connected component.
1151          scc_analyzer->GetSCC(field->containing_type()) !=
1152              scc_analyzer->GetSCC(field->message_type());
1153 }
1154 
GetSCCAnalysis(const SCC * scc)1155 MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1156   if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1157   MessageAnalysis result{};
1158   if (UsingImplicitWeakFields(scc->GetFile(), options_)) {
1159     result.contains_weak = true;
1160   }
1161   for (int i = 0; i < scc->descriptors.size(); i++) {
1162     const Descriptor* descriptor = scc->descriptors[i];
1163     if (descriptor->extension_range_count() > 0) {
1164       result.contains_extension = true;
1165     }
1166     for (int i = 0; i < descriptor->field_count(); i++) {
1167       const FieldDescriptor* field = descriptor->field(i);
1168       if (field->is_required()) {
1169         result.contains_required = true;
1170       }
1171       if (field->options().weak()) {
1172         result.contains_weak = true;
1173       }
1174       switch (field->type()) {
1175         case FieldDescriptor::TYPE_STRING:
1176         case FieldDescriptor::TYPE_BYTES: {
1177           if (field->options().ctype() == FieldOptions::CORD) {
1178             result.contains_cord = true;
1179           }
1180           break;
1181         }
1182         case FieldDescriptor::TYPE_GROUP:
1183         case FieldDescriptor::TYPE_MESSAGE: {
1184           const SCC* child = analyzer_.GetSCC(field->message_type());
1185           if (child != scc) {
1186             MessageAnalysis analysis = GetSCCAnalysis(child);
1187             result.contains_cord |= analysis.contains_cord;
1188             result.contains_extension |= analysis.contains_extension;
1189             if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1190               result.contains_required |= analysis.contains_required;
1191             }
1192             result.contains_weak |= analysis.contains_weak;
1193           } else {
1194             // This field points back into the same SCC hence the messages
1195             // in the SCC are recursive. Note if SCC contains more than two
1196             // nodes it has to be recursive, however this test also works for
1197             // a single node that is recursive.
1198             result.is_recursive = true;
1199           }
1200           break;
1201         }
1202         default:
1203           break;
1204       }
1205     }
1206   }
1207   // We deliberately only insert the result here. After we contracted the SCC
1208   // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1209   // nodes visited as we can never return to them. By inserting them here
1210   // we will go in an infinite loop if the SCC is not correct.
1211   return analysis_cache_[scc] = result;
1212 }
1213 
ListAllFields(const Descriptor * d,std::vector<const FieldDescriptor * > * fields)1214 void ListAllFields(const Descriptor* d,
1215                    std::vector<const FieldDescriptor*>* fields) {
1216   // Collect sub messages
1217   for (int i = 0; i < d->nested_type_count(); i++) {
1218     ListAllFields(d->nested_type(i), fields);
1219   }
1220   // Collect message level extensions.
1221   for (int i = 0; i < d->extension_count(); i++) {
1222     fields->push_back(d->extension(i));
1223   }
1224   // Add types of fields necessary
1225   for (int i = 0; i < d->field_count(); i++) {
1226     fields->push_back(d->field(i));
1227   }
1228 }
1229 
ListAllFields(const FileDescriptor * d,std::vector<const FieldDescriptor * > * fields)1230 void ListAllFields(const FileDescriptor* d,
1231                    std::vector<const FieldDescriptor*>* fields) {
1232   // Collect file level message.
1233   for (int i = 0; i < d->message_type_count(); i++) {
1234     ListAllFields(d->message_type(i), fields);
1235   }
1236   // Collect message level extensions.
1237   for (int i = 0; i < d->extension_count(); i++) {
1238     fields->push_back(d->extension(i));
1239   }
1240 }
1241 
ListAllTypesForServices(const FileDescriptor * fd,std::vector<const Descriptor * > * types)1242 void ListAllTypesForServices(const FileDescriptor* fd,
1243                              std::vector<const Descriptor*>* types) {
1244   for (int i = 0; i < fd->service_count(); i++) {
1245     const ServiceDescriptor* sd = fd->service(i);
1246     for (int j = 0; j < sd->method_count(); j++) {
1247       const MethodDescriptor* method = sd->method(j);
1248       types->push_back(method->input_type());
1249       types->push_back(method->output_type());
1250     }
1251   }
1252 }
1253 
GetBootstrapBasename(const Options & options,const std::string & basename,std::string * bootstrap_basename)1254 bool GetBootstrapBasename(const Options& options, const std::string& basename,
1255                           std::string* bootstrap_basename) {
1256   if (options.opensource_runtime) {
1257     return false;
1258   }
1259 
1260   std::unordered_map<std::string, std::string> bootstrap_mapping{
1261       {"net/proto2/proto/descriptor",
1262        "net/proto2/internal/descriptor"},
1263       {"net/proto2/compiler/proto/plugin",
1264        "net/proto2/compiler/proto/plugin"},
1265       {"net/proto2/compiler/proto/profile",
1266        "net/proto2/compiler/proto/profile_bootstrap"},
1267   };
1268   auto iter = bootstrap_mapping.find(basename);
1269   if (iter == bootstrap_mapping.end()) {
1270     *bootstrap_basename = basename;
1271     return false;
1272   } else {
1273     *bootstrap_basename = iter->second;
1274     return true;
1275   }
1276 }
1277 
IsBootstrapProto(const Options & options,const FileDescriptor * file)1278 bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1279   std::string my_name = StripProto(file->name());
1280   return GetBootstrapBasename(options, my_name, &my_name);
1281 }
1282 
MaybeBootstrap(const Options & options,GeneratorContext * generator_context,bool bootstrap_flag,std::string * basename)1283 bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1284                     bool bootstrap_flag, std::string* basename) {
1285   std::string bootstrap_basename;
1286   if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1287     return false;
1288   }
1289 
1290   if (bootstrap_flag) {
1291     // Adjust basename, but don't abort code generation.
1292     *basename = bootstrap_basename;
1293     return false;
1294   } else {
1295     std::string forward_to_basename = bootstrap_basename;
1296 
1297     // Generate forwarding headers and empty .pb.cc.
1298     {
1299       std::unique_ptr<io::ZeroCopyOutputStream> output(
1300           generator_context->Open(*basename + ".pb.h"));
1301       io::Printer printer(output.get(), '$', nullptr);
1302       printer.Print(
1303           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1304           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1305           "#include \"$forward_to_basename$.pb.h\"  // IWYU pragma: export\n"
1306           "#endif  // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1307           "forward_to_basename", forward_to_basename, "filename_identifier",
1308           FilenameIdentifier(*basename));
1309 
1310       if (!options.opensource_runtime) {
1311         // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1312         // protocoltype is SWIG'ed and we need to forward
1313         if (*basename == "net/proto/protocoltype") {
1314           printer.Print(
1315               "#ifdef SWIG\n"
1316               "%include \"$forward_to_basename$.pb.h\"\n"
1317               "#endif  // SWIG\n",
1318               "forward_to_basename", forward_to_basename);
1319         }
1320       }
1321     }
1322 
1323     {
1324       std::unique_ptr<io::ZeroCopyOutputStream> output(
1325           generator_context->Open(*basename + ".proto.h"));
1326       io::Printer printer(output.get(), '$', nullptr);
1327       printer.Print(
1328           "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1329           "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1330           "#include \"$forward_to_basename$.proto.h\"  // IWYU pragma: "
1331           "export\n"
1332           "#endif  // "
1333           "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1334           "forward_to_basename", forward_to_basename, "filename_identifier",
1335           FilenameIdentifier(*basename));
1336     }
1337 
1338     {
1339       std::unique_ptr<io::ZeroCopyOutputStream> output(
1340           generator_context->Open(*basename + ".pb.cc"));
1341       io::Printer printer(output.get(), '$', nullptr);
1342       printer.Print("\n");
1343     }
1344 
1345     {
1346       std::unique_ptr<io::ZeroCopyOutputStream> output(
1347           generator_context->Open(*basename + ".pb.h.meta"));
1348     }
1349 
1350     {
1351       std::unique_ptr<io::ZeroCopyOutputStream> output(
1352           generator_context->Open(*basename + ".proto.h.meta"));
1353     }
1354 
1355     // Abort code generation.
1356     return true;
1357   }
1358 }
1359 
HasExtensionFromFile(const Message & msg,const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1360 static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1361                                  const Options& options,
1362                                  bool* has_opt_codesize_extension) {
1363   std::vector<const FieldDescriptor*> fields;
1364   auto reflection = msg.GetReflection();
1365   reflection->ListFields(msg, &fields);
1366   for (auto field : fields) {
1367     const auto* field_msg = field->message_type();
1368     if (field_msg == nullptr) {
1369       // It so happens that enums Is_Valid are still generated so enums work.
1370       // Only messages have potential problems.
1371       continue;
1372     }
1373     // If this option has an extension set AND that extension is defined in the
1374     // same file we have bootstrap problem.
1375     if (field->is_extension()) {
1376       const auto* msg_extension_file = field->message_type()->file();
1377       if (msg_extension_file == file) return true;
1378       if (has_opt_codesize_extension &&
1379           GetOptimizeFor(msg_extension_file, options) ==
1380               FileOptions::CODE_SIZE) {
1381         *has_opt_codesize_extension = true;
1382       }
1383     }
1384     // Recurse in this field to see if there is a problem in there
1385     if (field->is_repeated()) {
1386       for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1387         if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1388                                  file, options, has_opt_codesize_extension)) {
1389           return true;
1390         }
1391       }
1392     } else {
1393       if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1394                                options, has_opt_codesize_extension)) {
1395         return true;
1396       }
1397     }
1398   }
1399   return false;
1400 }
1401 
HasBootstrapProblem(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1402 static bool HasBootstrapProblem(const FileDescriptor* file,
1403                                 const Options& options,
1404                                 bool* has_opt_codesize_extension) {
1405   static auto& cache = *new std::unordered_map<const FileDescriptor*, bool>;
1406   auto it = cache.find(file);
1407   if (it != cache.end()) return it->second;
1408   // In order to build the data structures for the reflective parse, it needs
1409   // to parse the serialized descriptor describing all the messages defined in
1410   // this file. Obviously this presents a bootstrap problem for descriptor
1411   // messages.
1412   if (file->name() == "net/proto2/proto/descriptor.proto" ||
1413       file->name() == "google/protobuf/descriptor.proto") {
1414     return true;
1415   }
1416   // Unfortunately we're not done yet. The descriptor option messages allow
1417   // for extensions. So we need to be able to parse these extensions in order
1418   // to parse the file descriptor for a file that has custom options. This is a
1419   // problem when these custom options extensions are defined in the same file.
1420   FileDescriptorProto linkedin_fd_proto;
1421   const DescriptorPool* pool = file->pool();
1422   const Descriptor* fd_proto_descriptor =
1423       pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1424   // Not all pools have descriptor.proto in them. In these cases there for sure
1425   // are no custom options.
1426   if (fd_proto_descriptor == nullptr) return false;
1427 
1428   // It's easier to inspect file as a proto, because we can use reflection on
1429   // the proto to iterate over all content.
1430   file->CopyTo(&linkedin_fd_proto);
1431 
1432   // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1433   // such it doesn't know the extensions that are potentially present in the
1434   // descriptor pool constructed from the protos that are being compiled. These
1435   // custom options are therefore in the unknown fields.
1436   // By building the corresponding FileDescriptorProto in the pool constructed
1437   // by the protos that are being compiled, ie. file's pool, the unknown fields
1438   // are converted to extensions.
1439   DynamicMessageFactory factory(pool);
1440   Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1441   fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1442 
1443   bool& res = cache[file];
1444   res = HasExtensionFromFile(*fd_proto, file, options,
1445                              has_opt_codesize_extension);
1446   delete fd_proto;
1447   return res;
1448 }
1449 
GetOptimizeFor(const FileDescriptor * file,const Options & options,bool * has_opt_codesize_extension)1450 FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1451                                         const Options& options,
1452                                         bool* has_opt_codesize_extension) {
1453   if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1454   switch (options.enforce_mode) {
1455     case EnforceOptimizeMode::kSpeed:
1456       return FileOptions::SPEED;
1457     case EnforceOptimizeMode::kLiteRuntime:
1458       return FileOptions::LITE_RUNTIME;
1459     case EnforceOptimizeMode::kCodeSize:
1460       if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1461         return FileOptions::LITE_RUNTIME;
1462       }
1463       if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1464         return FileOptions::SPEED;
1465       }
1466       return FileOptions::CODE_SIZE;
1467     case EnforceOptimizeMode::kNoEnforcement:
1468       if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1469         if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1470           GOOGLE_LOG(WARNING) << "Proto states optimize_for = CODE_SIZE, but we "
1471                           "cannot honor that because it contains custom option "
1472                           "extensions defined in the same proto.";
1473           return FileOptions::SPEED;
1474         }
1475       }
1476       return file->options().optimize_for();
1477   }
1478 
1479   GOOGLE_LOG(FATAL) << "Unknown optimization enforcement requested.";
1480   // The phony return below serves to silence a warning from GCC 8.
1481   return FileOptions::SPEED;
1482 }
1483 
1484 }  // namespace cpp
1485 }  // namespace compiler
1486 }  // namespace protobuf
1487 }  // namespace google
1488