1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <limits>
36 #include <map>
37 #include <vector>
38 #include <google/protobuf/stubs/hash.h>
39 
40 #include <google/protobuf/compiler/cpp/cpp_helpers.h>
41 #include <google/protobuf/io/printer.h>
42 #include <google/protobuf/stubs/common.h>
43 #include <google/protobuf/stubs/strutil.h>
44 #include <google/protobuf/stubs/substitute.h>
45 
46 
47 namespace google {
48 namespace protobuf {
49 namespace compiler {
50 namespace cpp {
51 
52 namespace {
53 
DotsToUnderscores(const string & name)54 string DotsToUnderscores(const string& name) {
55   return StringReplace(name, ".", "_", true);
56 }
57 
DotsToColons(const string & name)58 string DotsToColons(const string& name) {
59   return StringReplace(name, ".", "::", true);
60 }
61 
62 const char* const kKeywordList[] = {
63   "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case",
64   "catch", "char", "class", "compl", "const", "const_cast", "continue",
65   "default", "delete", "do", "double", "dynamic_cast", "else", "enum",
66   "explicit", "extern", "false", "float", "for", "friend", "goto", "if",
67   "inline", "int", "long", "mutable", "namespace", "new", "not", "not_eq",
68   "operator", "or", "or_eq", "private", "protected", "public", "register",
69   "reinterpret_cast", "return", "short", "signed", "sizeof", "static",
70   "static_cast", "struct", "switch", "template", "this", "throw", "true", "try",
71   "typedef", "typeid", "typename", "union", "unsigned", "using", "virtual",
72   "void", "volatile", "wchar_t", "while", "xor", "xor_eq"
73 };
74 
MakeKeywordsMap()75 hash_set<string> MakeKeywordsMap() {
76   hash_set<string> result;
77   for (int i = 0; i < GOOGLE_ARRAYSIZE(kKeywordList); i++) {
78     result.insert(kKeywordList[i]);
79   }
80   return result;
81 }
82 
83 hash_set<string> kKeywords = MakeKeywordsMap();
84 
85 // Returns whether the provided descriptor has an extension. This includes its
86 // nested types.
HasExtension(const Descriptor * descriptor)87 bool HasExtension(const Descriptor* descriptor) {
88   if (descriptor->extension_count() > 0) {
89     return true;
90   }
91   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
92     if (HasExtension(descriptor->nested_type(i))) {
93       return true;
94     }
95   }
96   return false;
97 }
98 
99 }  // namespace
100 
UnderscoresToCamelCase(const string & input,bool cap_next_letter)101 string UnderscoresToCamelCase(const string& input, bool cap_next_letter) {
102   string result;
103   // Note:  I distrust ctype.h due to locales.
104   for (int i = 0; i < input.size(); i++) {
105     if ('a' <= input[i] && input[i] <= 'z') {
106       if (cap_next_letter) {
107         result += input[i] + ('A' - 'a');
108       } else {
109         result += input[i];
110       }
111       cap_next_letter = false;
112     } else if ('A' <= input[i] && input[i] <= 'Z') {
113       // Capital letters are left as-is.
114       result += input[i];
115       cap_next_letter = false;
116     } else if ('0' <= input[i] && input[i] <= '9') {
117       result += input[i];
118       cap_next_letter = true;
119     } else {
120       cap_next_letter = true;
121     }
122   }
123   return result;
124 }
125 
126 const char kThickSeparator[] =
127   "// ===================================================================\n";
128 const char kThinSeparator[] =
129   "// -------------------------------------------------------------------\n";
130 
ClassName(const Descriptor * descriptor,bool qualified)131 string ClassName(const Descriptor* descriptor, bool qualified) {
132 
133   // Find "outer", the descriptor of the top-level message in which
134   // "descriptor" is embedded.
135   const Descriptor* outer = descriptor;
136   while (outer->containing_type() != NULL) outer = outer->containing_type();
137 
138   const string& outer_name = outer->full_name();
139   string inner_name = descriptor->full_name().substr(outer_name.size());
140 
141   if (qualified) {
142     return "::" + DotsToColons(outer_name) + DotsToUnderscores(inner_name);
143   } else {
144     return outer->name() + DotsToUnderscores(inner_name);
145   }
146 }
147 
ClassName(const EnumDescriptor * enum_descriptor,bool qualified)148 string ClassName(const EnumDescriptor* enum_descriptor, bool qualified) {
149   if (enum_descriptor->containing_type() == NULL) {
150     if (qualified) {
151       return "::" + DotsToColons(enum_descriptor->full_name());
152     } else {
153       return enum_descriptor->name();
154     }
155   } else {
156     string result = ClassName(enum_descriptor->containing_type(), qualified);
157     result += '_';
158     result += enum_descriptor->name();
159     return result;
160   }
161 }
162 
163 
SuperClassName(const Descriptor * descriptor)164 string SuperClassName(const Descriptor* descriptor) {
165   return HasDescriptorMethods(descriptor->file()) ?
166       "::google::protobuf::Message" : "::google::protobuf::MessageLite";
167 }
168 
FieldName(const FieldDescriptor * field)169 string FieldName(const FieldDescriptor* field) {
170   string result = field->name();
171   LowerString(&result);
172   if (kKeywords.count(result) > 0) {
173     result.append("_");
174   }
175   return result;
176 }
177 
FieldConstantName(const FieldDescriptor * field)178 string FieldConstantName(const FieldDescriptor *field) {
179   string field_name = UnderscoresToCamelCase(field->name(), true);
180   string result = "k" + field_name + "FieldNumber";
181 
182   if (!field->is_extension() &&
183       field->containing_type()->FindFieldByCamelcaseName(
184         field->camelcase_name()) != field) {
185     // This field's camelcase name is not unique.  As a hack, add the field
186     // number to the constant name.  This makes the constant rather useless,
187     // but what can we do?
188     result += "_" + SimpleItoa(field->number());
189   }
190 
191   return result;
192 }
193 
FieldMessageTypeName(const FieldDescriptor * field)194 string FieldMessageTypeName(const FieldDescriptor* field) {
195   // Note:  The Google-internal version of Protocol Buffers uses this function
196   //   as a hook point for hacks to support legacy code.
197   return ClassName(field->message_type(), true);
198 }
199 
StripProto(const string & filename)200 string StripProto(const string& filename) {
201   if (HasSuffixString(filename, ".protodevel")) {
202     return StripSuffixString(filename, ".protodevel");
203   } else {
204     return StripSuffixString(filename, ".proto");
205   }
206 }
207 
PrimitiveTypeName(FieldDescriptor::CppType type)208 const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
209   switch (type) {
210     case FieldDescriptor::CPPTYPE_INT32  : return "::google::protobuf::int32";
211     case FieldDescriptor::CPPTYPE_INT64  : return "::google::protobuf::int64";
212     case FieldDescriptor::CPPTYPE_UINT32 : return "::google::protobuf::uint32";
213     case FieldDescriptor::CPPTYPE_UINT64 : return "::google::protobuf::uint64";
214     case FieldDescriptor::CPPTYPE_DOUBLE : return "double";
215     case FieldDescriptor::CPPTYPE_FLOAT  : return "float";
216     case FieldDescriptor::CPPTYPE_BOOL   : return "bool";
217     case FieldDescriptor::CPPTYPE_ENUM   : return "int";
218     case FieldDescriptor::CPPTYPE_STRING : return "::std::string";
219     case FieldDescriptor::CPPTYPE_MESSAGE: return NULL;
220 
221     // No default because we want the compiler to complain if any new
222     // CppTypes are added.
223   }
224 
225   GOOGLE_LOG(FATAL) << "Can't get here.";
226   return NULL;
227 }
228 
DeclaredTypeMethodName(FieldDescriptor::Type type)229 const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
230   switch (type) {
231     case FieldDescriptor::TYPE_INT32   : return "Int32";
232     case FieldDescriptor::TYPE_INT64   : return "Int64";
233     case FieldDescriptor::TYPE_UINT32  : return "UInt32";
234     case FieldDescriptor::TYPE_UINT64  : return "UInt64";
235     case FieldDescriptor::TYPE_SINT32  : return "SInt32";
236     case FieldDescriptor::TYPE_SINT64  : return "SInt64";
237     case FieldDescriptor::TYPE_FIXED32 : return "Fixed32";
238     case FieldDescriptor::TYPE_FIXED64 : return "Fixed64";
239     case FieldDescriptor::TYPE_SFIXED32: return "SFixed32";
240     case FieldDescriptor::TYPE_SFIXED64: return "SFixed64";
241     case FieldDescriptor::TYPE_FLOAT   : return "Float";
242     case FieldDescriptor::TYPE_DOUBLE  : return "Double";
243 
244     case FieldDescriptor::TYPE_BOOL    : return "Bool";
245     case FieldDescriptor::TYPE_ENUM    : return "Enum";
246 
247     case FieldDescriptor::TYPE_STRING  : return "String";
248     case FieldDescriptor::TYPE_BYTES   : return "Bytes";
249     case FieldDescriptor::TYPE_GROUP   : return "Group";
250     case FieldDescriptor::TYPE_MESSAGE : return "Message";
251 
252     // No default because we want the compiler to complain if any new
253     // types are added.
254   }
255   GOOGLE_LOG(FATAL) << "Can't get here.";
256   return "";
257 }
258 
Int32ToString(int number)259 string Int32ToString(int number) {
260   // gcc rejects the decimal form of kint32min.
261   if (number == kint32min) {
262     GOOGLE_COMPILE_ASSERT(kint32min == (~0x7fffffff), kint32min_value_error);
263     return "(~0x7fffffff)";
264   } else {
265     return SimpleItoa(number);
266   }
267 }
268 
Int64ToString(int64 number)269 string Int64ToString(int64 number) {
270   // gcc rejects the decimal form of kint64min
271   if (number == kint64min) {
272     // Make sure we are in a 2's complement system.
273     GOOGLE_COMPILE_ASSERT(kint64min == GOOGLE_LONGLONG(~0x7fffffffffffffff),
274                    kint64min_value_error);
275     return "GOOGLE_LONGLONG(~0x7fffffffffffffff)";
276   }
277   return "GOOGLE_LONGLONG(" + SimpleItoa(number) + ")";
278 }
279 
DefaultValue(const FieldDescriptor * field)280 string DefaultValue(const FieldDescriptor* field) {
281   switch (field->cpp_type()) {
282     case FieldDescriptor::CPPTYPE_INT32:
283       return Int32ToString(field->default_value_int32());
284     case FieldDescriptor::CPPTYPE_UINT32:
285       return SimpleItoa(field->default_value_uint32()) + "u";
286     case FieldDescriptor::CPPTYPE_INT64:
287       return Int64ToString(field->default_value_int64());
288     case FieldDescriptor::CPPTYPE_UINT64:
289       return "GOOGLE_ULONGLONG(" + SimpleItoa(field->default_value_uint64())+ ")";
290     case FieldDescriptor::CPPTYPE_DOUBLE: {
291       double value = field->default_value_double();
292       if (value == numeric_limits<double>::infinity()) {
293         return "::google::protobuf::internal::Infinity()";
294       } else if (value == -numeric_limits<double>::infinity()) {
295         return "-::google::protobuf::internal::Infinity()";
296       } else if (value != value) {
297         return "::google::protobuf::internal::NaN()";
298       } else {
299         return SimpleDtoa(value);
300       }
301     }
302     case FieldDescriptor::CPPTYPE_FLOAT:
303       {
304         float value = field->default_value_float();
305         if (value == numeric_limits<float>::infinity()) {
306           return "static_cast<float>(::google::protobuf::internal::Infinity())";
307         } else if (value == -numeric_limits<float>::infinity()) {
308           return "static_cast<float>(-::google::protobuf::internal::Infinity())";
309         } else if (value != value) {
310           return "static_cast<float>(::google::protobuf::internal::NaN())";
311         } else {
312           string float_value = SimpleFtoa(value);
313           // If floating point value contains a period (.) or an exponent
314           // (either E or e), then append suffix 'f' to make it a float
315           // literal.
316           if (float_value.find_first_of(".eE") != string::npos) {
317             float_value.push_back('f');
318           }
319           return float_value;
320         }
321       }
322     case FieldDescriptor::CPPTYPE_BOOL:
323       return field->default_value_bool() ? "true" : "false";
324     case FieldDescriptor::CPPTYPE_ENUM:
325       // Lazy:  Generate a static_cast because we don't have a helper function
326       //   that constructs the full name of an enum value.
327       return strings::Substitute(
328           "static_cast< $0 >($1)",
329           ClassName(field->enum_type(), true),
330           Int32ToString(field->default_value_enum()->number()));
331     case FieldDescriptor::CPPTYPE_STRING:
332       return "\"" + EscapeTrigraphs(
333         CEscape(field->default_value_string())) +
334         "\"";
335     case FieldDescriptor::CPPTYPE_MESSAGE:
336       return FieldMessageTypeName(field) + "::default_instance()";
337   }
338   // Can't actually get here; make compiler happy.  (We could add a default
339   // case above but then we wouldn't get the nice compiler warning when a
340   // new type is added.)
341   GOOGLE_LOG(FATAL) << "Can't get here.";
342   return "";
343 }
344 
345 // Convert a file name into a valid identifier.
FilenameIdentifier(const string & filename)346 string FilenameIdentifier(const string& filename) {
347   string result;
348   for (int i = 0; i < filename.size(); i++) {
349     if (ascii_isalnum(filename[i])) {
350       result.push_back(filename[i]);
351     } else {
352       // Not alphanumeric.  To avoid any possibility of name conflicts we
353       // use the hex code for the character.
354       result.push_back('_');
355       char buffer[kFastToBufferSize];
356       result.append(FastHexToBuffer(static_cast<uint8>(filename[i]), buffer));
357     }
358   }
359   return result;
360 }
361 
362 // Return the name of the AddDescriptors() function for a given file.
GlobalAddDescriptorsName(const string & filename)363 string GlobalAddDescriptorsName(const string& filename) {
364   return "protobuf_AddDesc_" + FilenameIdentifier(filename);
365 }
366 
367 // Return the name of the AssignDescriptors() function for a given file.
GlobalAssignDescriptorsName(const string & filename)368 string GlobalAssignDescriptorsName(const string& filename) {
369   return "protobuf_AssignDesc_" + FilenameIdentifier(filename);
370 }
371 
372 // Return the name of the ShutdownFile() function for a given file.
GlobalShutdownFileName(const string & filename)373 string GlobalShutdownFileName(const string& filename) {
374   return "protobuf_ShutdownFile_" + FilenameIdentifier(filename);
375 }
376 
377 // Return the qualified C++ name for a file level symbol.
QualifiedFileLevelSymbol(const string & package,const string & name)378 string QualifiedFileLevelSymbol(const string& package, const string& name) {
379   if (package.empty()) {
380     return StrCat("::", name);
381   }
382   return StrCat("::", DotsToColons(package), "::", name);
383 }
384 
385 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const string & to_escape)386 string EscapeTrigraphs(const string& to_escape) {
387   return StringReplace(to_escape, "?", "\\?", true);
388 }
389 
390 // Escaped function name to eliminate naming conflict.
SafeFunctionName(const Descriptor * descriptor,const FieldDescriptor * field,const string & prefix)391 string SafeFunctionName(const Descriptor* descriptor,
392                         const FieldDescriptor* field,
393                         const string& prefix) {
394   // Do not use FieldName() since it will escape keywords.
395   string name = field->name();
396   LowerString(&name);
397   string function_name = prefix + name;
398   if (descriptor->FindFieldByName(function_name)) {
399     // Single underscore will also make it conflicting with the private data
400     // member. We use double underscore to escape function names.
401     function_name.append("__");
402   } else if (kKeywords.count(name) > 0) {
403     // If the field name is a keyword, we append the underscore back to keep it
404     // consistent with other function names.
405     function_name.append("_");
406   }
407   return function_name;
408 }
409 
StaticInitializersForced(const FileDescriptor * file)410 bool StaticInitializersForced(const FileDescriptor* file) {
411   if (HasDescriptorMethods(file) || file->extension_count() > 0) {
412     return true;
413   }
414   for (int i = 0; i < file->message_type_count(); ++i) {
415     if (HasExtension(file->message_type(i))) {
416       return true;
417     }
418   }
419   return false;
420 }
421 
PrintHandlingOptionalStaticInitializers(const FileDescriptor * file,io::Printer * printer,const char * with_static_init,const char * without_static_init,const char * var1,const string & val1,const char * var2,const string & val2)422 void PrintHandlingOptionalStaticInitializers(
423     const FileDescriptor* file, io::Printer* printer,
424     const char* with_static_init, const char* without_static_init,
425     const char* var1, const string& val1,
426     const char* var2, const string& val2) {
427   map<string, string> vars;
428   if (var1) {
429     vars[var1] = val1;
430   }
431   if (var2) {
432     vars[var2] = val2;
433   }
434   PrintHandlingOptionalStaticInitializers(
435       vars, file, printer, with_static_init, without_static_init);
436 }
437 
PrintHandlingOptionalStaticInitializers(const map<string,string> & vars,const FileDescriptor * file,io::Printer * printer,const char * with_static_init,const char * without_static_init)438 void PrintHandlingOptionalStaticInitializers(
439     const map<string, string>& vars, const FileDescriptor* file,
440     io::Printer* printer, const char* with_static_init,
441     const char* without_static_init) {
442   if (StaticInitializersForced(file)) {
443     printer->Print(vars, with_static_init);
444   } else {
445     printer->Print(vars, (string(
446       "#ifdef GOOGLE_PROTOBUF_NO_STATIC_INITIALIZER\n") +
447       without_static_init +
448       "#else\n" +
449       with_static_init +
450       "#endif\n").c_str());
451   }
452 }
453 
454 
HasEnumDefinitions(const Descriptor * message_type)455 static bool HasEnumDefinitions(const Descriptor* message_type) {
456   if (message_type->enum_type_count() > 0) return true;
457   for (int i = 0; i < message_type->nested_type_count(); ++i) {
458     if (HasEnumDefinitions(message_type->nested_type(i))) return true;
459   }
460   return false;
461 }
462 
HasEnumDefinitions(const FileDescriptor * file)463 bool HasEnumDefinitions(const FileDescriptor* file) {
464   if (file->enum_type_count() > 0) return true;
465   for (int i = 0; i < file->message_type_count(); ++i) {
466     if (HasEnumDefinitions(file->message_type(i))) return true;
467   }
468   return false;
469 }
470 
IsStringOrMessage(const FieldDescriptor * field)471 bool IsStringOrMessage(const FieldDescriptor* field) {
472   switch (field->cpp_type()) {
473     case FieldDescriptor::CPPTYPE_INT32:
474     case FieldDescriptor::CPPTYPE_INT64:
475     case FieldDescriptor::CPPTYPE_UINT32:
476     case FieldDescriptor::CPPTYPE_UINT64:
477     case FieldDescriptor::CPPTYPE_DOUBLE:
478     case FieldDescriptor::CPPTYPE_FLOAT:
479     case FieldDescriptor::CPPTYPE_BOOL:
480     case FieldDescriptor::CPPTYPE_ENUM:
481       return false;
482     case FieldDescriptor::CPPTYPE_STRING:
483     case FieldDescriptor::CPPTYPE_MESSAGE:
484       return true;
485   }
486 
487   GOOGLE_LOG(FATAL) << "Can't get here.";
488   return false;
489 }
490 
491 }  // namespace cpp
492 }  // namespace compiler
493 }  // namespace protobuf
494 }  // namespace google
495