1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/java/java_helpers.h>
36 
37 #include <algorithm>
38 #include <cstdint>
39 #include <limits>
40 #include <unordered_set>
41 #include <vector>
42 
43 #include <google/protobuf/stubs/stringprintf.h>
44 #include <google/protobuf/compiler/java/java_name_resolver.h>
45 #include <google/protobuf/compiler/java/java_names.h>
46 #include <google/protobuf/descriptor.pb.h>
47 #include <google/protobuf/wire_format.h>
48 #include <google/protobuf/stubs/strutil.h>
49 #include <google/protobuf/stubs/substitute.h>
50 #include <google/protobuf/stubs/hash.h>  // for hash<T *>
51 
52 namespace google {
53 namespace protobuf {
54 namespace compiler {
55 namespace java {
56 
57 using internal::WireFormat;
58 using internal::WireFormatLite;
59 
60 const char kThickSeparator[] =
61     "// ===================================================================\n";
62 const char kThinSeparator[] =
63     "// -------------------------------------------------------------------\n";
64 
65 namespace {
66 
67 const char* kDefaultPackage = "";
68 
69 // Names that should be avoided as field names.
70 // Using them will cause the compiler to generate accessors whose names are
71 // colliding with methods defined in base classes.
72 const char* kForbiddenWordList[] = {
73     // message base class:
74     "cached_size",
75     "serialized_size",
76     // java.lang.Object:
77     "class",
78 };
79 
80 const std::unordered_set<std::string>* kReservedNames =
81     new std::unordered_set<std::string>({
82         "abstract",   "assert",       "boolean",   "break",      "byte",
83         "case",       "catch",        "char",      "class",      "const",
84         "continue",   "default",      "do",        "double",     "else",
85         "enum",       "extends",      "final",     "finally",    "float",
86         "for",        "goto",         "if",        "implements", "import",
87         "instanceof", "int",          "interface", "long",       "native",
88         "new",        "package",      "private",   "protected",  "public",
89         "return",     "short",        "static",    "strictfp",   "super",
90         "switch",     "synchronized", "this",      "throw",      "throws",
91         "transient",  "try",          "void",      "volatile",   "while",
92     });
93 
IsForbidden(const std::string & field_name)94 bool IsForbidden(const std::string& field_name) {
95   for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) {
96     if (field_name == kForbiddenWordList[i]) {
97       return true;
98     }
99   }
100   return false;
101 }
102 
FieldName(const FieldDescriptor * field)103 std::string FieldName(const FieldDescriptor* field) {
104   std::string field_name;
105   // Groups are hacky:  The name of the field is just the lower-cased name
106   // of the group type.  In Java, though, we would like to retain the original
107   // capitalization of the type name.
108   if (GetType(field) == FieldDescriptor::TYPE_GROUP) {
109     field_name = field->message_type()->name();
110   } else {
111     field_name = field->name();
112   }
113   if (IsForbidden(field_name)) {
114     // Append a trailing "#" to indicate that the name should be decorated to
115     // avoid collision with other names.
116     field_name += "#";
117   }
118   return field_name;
119 }
120 
121 
122 }  // namespace
123 
PrintGeneratedAnnotation(io::Printer * printer,char delimiter,const std::string & annotation_file)124 void PrintGeneratedAnnotation(io::Printer* printer, char delimiter,
125                               const std::string& annotation_file) {
126   if (annotation_file.empty()) {
127     return;
128   }
129   std::string ptemplate =
130       "@javax.annotation.Generated(value=\"protoc\", comments=\"annotations:";
131   ptemplate.push_back(delimiter);
132   ptemplate.append("annotation_file");
133   ptemplate.push_back(delimiter);
134   ptemplate.append("\")\n");
135   printer->Print(ptemplate.c_str(), "annotation_file", annotation_file);
136 }
137 
PrintEnumVerifierLogic(io::Printer * printer,const FieldDescriptor * descriptor,const std::map<std::string,std::string> & variables,const char * var_name,const char * terminating_string,bool enforce_lite)138 void PrintEnumVerifierLogic(io::Printer* printer,
139                             const FieldDescriptor* descriptor,
140                             const std::map<std::string, std::string>& variables,
141                             const char* var_name,
142                             const char* terminating_string, bool enforce_lite) {
143   std::string enum_verifier_string =
144       enforce_lite ? StrCat(var_name, ".internalGetVerifier()")
145                    : StrCat(
146                          "new com.google.protobuf.Internal.EnumVerifier() {\n"
147                          "        @java.lang.Override\n"
148                          "        public boolean isInRange(int number) {\n"
149                          "          return ",
150                          var_name,
151                          ".forNumber(number) != null;\n"
152                          "        }\n"
153                          "      }");
154   printer->Print(
155       variables,
156       StrCat(enum_verifier_string, terminating_string).c_str());
157 }
158 
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)159 std::string UnderscoresToCamelCase(const std::string& input,
160                                    bool cap_next_letter) {
161   GOOGLE_CHECK(!input.empty());
162   std::string result;
163   // Note:  I distrust ctype.h due to locales.
164   for (int i = 0; i < input.size(); i++) {
165     if ('a' <= input[i] && input[i] <= 'z') {
166       if (cap_next_letter) {
167         result += input[i] + ('A' - 'a');
168       } else {
169         result += input[i];
170       }
171       cap_next_letter = false;
172     } else if ('A' <= input[i] && input[i] <= 'Z') {
173       if (i == 0 && !cap_next_letter) {
174         // Force first letter to lower-case unless explicitly told to
175         // capitalize it.
176         result += input[i] + ('a' - 'A');
177       } else {
178         // Capital letters after the first are left as-is.
179         result += input[i];
180       }
181       cap_next_letter = false;
182     } else if ('0' <= input[i] && input[i] <= '9') {
183       result += input[i];
184       cap_next_letter = true;
185     } else {
186       cap_next_letter = true;
187     }
188   }
189   // Add a trailing "_" if the name should be altered.
190   if (input[input.size() - 1] == '#') {
191     result += '_';
192   }
193   return result;
194 }
195 
ToCamelCase(const std::string & input,bool lower_first)196 std::string ToCamelCase(const std::string& input, bool lower_first) {
197   bool capitalize_next = !lower_first;
198   std::string result;
199   result.reserve(input.size());
200 
201   for (char i : input) {
202     if (i == '_') {
203       capitalize_next = true;
204     } else if (capitalize_next) {
205       result.push_back(ToUpperCh(i));
206       capitalize_next = false;
207     } else {
208       result.push_back(i);
209     }
210   }
211 
212   // Lower-case the first letter.
213   if (lower_first && !result.empty()) {
214     result[0] = ToLowerCh(result[0]);
215   }
216 
217   return result;
218 }
219 
ToUpperCh(char ch)220 char ToUpperCh(char ch) {
221   return (ch >= 'a' && ch <= 'z') ? (ch - 'a' + 'A') : ch;
222 }
223 
ToLowerCh(char ch)224 char ToLowerCh(char ch) {
225   return (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
226 }
227 
UnderscoresToCamelCase(const FieldDescriptor * field)228 std::string UnderscoresToCamelCase(const FieldDescriptor* field) {
229   return UnderscoresToCamelCase(FieldName(field), false);
230 }
231 
UnderscoresToCapitalizedCamelCase(const FieldDescriptor * field)232 std::string UnderscoresToCapitalizedCamelCase(const FieldDescriptor* field) {
233   return UnderscoresToCamelCase(FieldName(field), true);
234 }
235 
CapitalizedFieldName(const FieldDescriptor * field)236 std::string CapitalizedFieldName(const FieldDescriptor* field) {
237   return UnderscoresToCapitalizedCamelCase(field);
238 }
239 
UnderscoresToCamelCase(const MethodDescriptor * method)240 std::string UnderscoresToCamelCase(const MethodDescriptor* method) {
241   return UnderscoresToCamelCase(method->name(), false);
242 }
243 
UnderscoresToCamelCaseCheckReserved(const FieldDescriptor * field)244 std::string UnderscoresToCamelCaseCheckReserved(const FieldDescriptor* field) {
245   std::string name = UnderscoresToCamelCase(field);
246   if (kReservedNames->find(name) != kReservedNames->end()) {
247     return name + "_";
248   }
249   return name;
250 }
251 
IsForbiddenKotlin(const std::string & field_name)252 bool IsForbiddenKotlin(const std::string& field_name) {
253   // Names that should be avoided as field names in Kotlin.
254   // All Kotlin hard keywords are in this list.
255   const std::unordered_set<std::string>* kKotlinForbiddenNames =
256       new std::unordered_set<std::string>({
257           "as",      "as?",       "break",  "class", "continue", "do",
258           "else",    "false",     "for",    "fun",   "if",       "in",
259           "!in",     "interface", "is",     "!is",   "null",     "object",
260           "package", "return",    "super",  "this",  "throw",    "true",
261           "try",     "typealias", "typeof", "val",   "var",      "when",
262           "while",
263       });
264   return kKotlinForbiddenNames->find(field_name) !=
265          kKotlinForbiddenNames->end();
266 }
267 
UniqueFileScopeIdentifier(const Descriptor * descriptor)268 std::string UniqueFileScopeIdentifier(const Descriptor* descriptor) {
269   return "static_" + StringReplace(descriptor->full_name(), ".", "_", true);
270 }
271 
CamelCaseFieldName(const FieldDescriptor * field)272 std::string CamelCaseFieldName(const FieldDescriptor* field) {
273   std::string fieldName = UnderscoresToCamelCase(field);
274   if ('0' <= fieldName[0] && fieldName[0] <= '9') {
275     return '_' + fieldName;
276   }
277   return fieldName;
278 }
279 
FileClassName(const FileDescriptor * file,bool immutable)280 std::string FileClassName(const FileDescriptor* file, bool immutable) {
281   ClassNameResolver name_resolver;
282   return name_resolver.GetFileClassName(file, immutable);
283 }
284 
FileJavaPackage(const FileDescriptor * file,bool immutable)285 std::string FileJavaPackage(const FileDescriptor* file, bool immutable) {
286   std::string result;
287 
288   if (file->options().has_java_package()) {
289     result = file->options().java_package();
290   } else {
291     result = kDefaultPackage;
292     if (!file->package().empty()) {
293       if (!result.empty()) result += '.';
294       result += file->package();
295     }
296   }
297 
298   return result;
299 }
300 
FileJavaPackage(const FileDescriptor * file)301 std::string FileJavaPackage(const FileDescriptor* file) {
302   return FileJavaPackage(file, true /* immutable */);
303 }
304 
JavaPackageToDir(std::string package_name)305 std::string JavaPackageToDir(std::string package_name) {
306   std::string package_dir = StringReplace(package_name, ".", "/", true);
307   if (!package_dir.empty()) package_dir += "/";
308   return package_dir;
309 }
310 
ClassName(const Descriptor * descriptor)311 std::string ClassName(const Descriptor* descriptor) {
312   ClassNameResolver name_resolver;
313   return name_resolver.GetClassName(descriptor, true);
314 }
315 
ClassName(const EnumDescriptor * descriptor)316 std::string ClassName(const EnumDescriptor* descriptor) {
317   ClassNameResolver name_resolver;
318   return name_resolver.GetClassName(descriptor, true);
319 }
320 
ClassName(const ServiceDescriptor * descriptor)321 std::string ClassName(const ServiceDescriptor* descriptor) {
322   ClassNameResolver name_resolver;
323   return name_resolver.GetClassName(descriptor, true);
324 }
325 
ClassName(const FileDescriptor * descriptor)326 std::string ClassName(const FileDescriptor* descriptor) {
327   ClassNameResolver name_resolver;
328   return name_resolver.GetClassName(descriptor, true);
329 }
330 
331 
ExtraMessageInterfaces(const Descriptor * descriptor)332 std::string ExtraMessageInterfaces(const Descriptor* descriptor) {
333   std::string interfaces = "// @@protoc_insertion_point(message_implements:" +
334                            descriptor->full_name() + ")";
335   return interfaces;
336 }
337 
338 
ExtraBuilderInterfaces(const Descriptor * descriptor)339 std::string ExtraBuilderInterfaces(const Descriptor* descriptor) {
340   std::string interfaces = "// @@protoc_insertion_point(builder_implements:" +
341                            descriptor->full_name() + ")";
342   return interfaces;
343 }
344 
ExtraMessageOrBuilderInterfaces(const Descriptor * descriptor)345 std::string ExtraMessageOrBuilderInterfaces(const Descriptor* descriptor) {
346   std::string interfaces = "// @@protoc_insertion_point(interface_extends:" +
347                            descriptor->full_name() + ")";
348   return interfaces;
349 }
350 
FieldConstantName(const FieldDescriptor * field)351 std::string FieldConstantName(const FieldDescriptor* field) {
352   std::string name = field->name() + "_FIELD_NUMBER";
353   ToUpper(&name);
354   return name;
355 }
356 
GetType(const FieldDescriptor * field)357 FieldDescriptor::Type GetType(const FieldDescriptor* field) {
358   return field->type();
359 }
360 
GetJavaType(const FieldDescriptor * field)361 JavaType GetJavaType(const FieldDescriptor* field) {
362   switch (GetType(field)) {
363     case FieldDescriptor::TYPE_INT32:
364     case FieldDescriptor::TYPE_UINT32:
365     case FieldDescriptor::TYPE_SINT32:
366     case FieldDescriptor::TYPE_FIXED32:
367     case FieldDescriptor::TYPE_SFIXED32:
368       return JAVATYPE_INT;
369 
370     case FieldDescriptor::TYPE_INT64:
371     case FieldDescriptor::TYPE_UINT64:
372     case FieldDescriptor::TYPE_SINT64:
373     case FieldDescriptor::TYPE_FIXED64:
374     case FieldDescriptor::TYPE_SFIXED64:
375       return JAVATYPE_LONG;
376 
377     case FieldDescriptor::TYPE_FLOAT:
378       return JAVATYPE_FLOAT;
379 
380     case FieldDescriptor::TYPE_DOUBLE:
381       return JAVATYPE_DOUBLE;
382 
383     case FieldDescriptor::TYPE_BOOL:
384       return JAVATYPE_BOOLEAN;
385 
386     case FieldDescriptor::TYPE_STRING:
387       return JAVATYPE_STRING;
388 
389     case FieldDescriptor::TYPE_BYTES:
390       return JAVATYPE_BYTES;
391 
392     case FieldDescriptor::TYPE_ENUM:
393       return JAVATYPE_ENUM;
394 
395     case FieldDescriptor::TYPE_GROUP:
396     case FieldDescriptor::TYPE_MESSAGE:
397       return JAVATYPE_MESSAGE;
398 
399       // No default because we want the compiler to complain if any new
400       // types are added.
401   }
402 
403   GOOGLE_LOG(FATAL) << "Can't get here.";
404   return JAVATYPE_INT;
405 }
406 
PrimitiveTypeName(JavaType type)407 const char* PrimitiveTypeName(JavaType type) {
408   switch (type) {
409     case JAVATYPE_INT:
410       return "int";
411     case JAVATYPE_LONG:
412       return "long";
413     case JAVATYPE_FLOAT:
414       return "float";
415     case JAVATYPE_DOUBLE:
416       return "double";
417     case JAVATYPE_BOOLEAN:
418       return "boolean";
419     case JAVATYPE_STRING:
420       return "java.lang.String";
421     case JAVATYPE_BYTES:
422       return "com.google.protobuf.ByteString";
423     case JAVATYPE_ENUM:
424       return NULL;
425     case JAVATYPE_MESSAGE:
426       return NULL;
427 
428       // No default because we want the compiler to complain if any new
429       // JavaTypes are added.
430   }
431 
432   GOOGLE_LOG(FATAL) << "Can't get here.";
433   return NULL;
434 }
435 
PrimitiveTypeName(const FieldDescriptor * descriptor)436 const char* PrimitiveTypeName(const FieldDescriptor* descriptor) {
437   return PrimitiveTypeName(GetJavaType(descriptor));
438 }
439 
BoxedPrimitiveTypeName(JavaType type)440 const char* BoxedPrimitiveTypeName(JavaType type) {
441   switch (type) {
442     case JAVATYPE_INT:
443       return "java.lang.Integer";
444     case JAVATYPE_LONG:
445       return "java.lang.Long";
446     case JAVATYPE_FLOAT:
447       return "java.lang.Float";
448     case JAVATYPE_DOUBLE:
449       return "java.lang.Double";
450     case JAVATYPE_BOOLEAN:
451       return "java.lang.Boolean";
452     case JAVATYPE_STRING:
453       return "java.lang.String";
454     case JAVATYPE_BYTES:
455       return "com.google.protobuf.ByteString";
456     case JAVATYPE_ENUM:
457       return NULL;
458     case JAVATYPE_MESSAGE:
459       return NULL;
460 
461       // No default because we want the compiler to complain if any new
462       // JavaTypes are added.
463   }
464 
465   GOOGLE_LOG(FATAL) << "Can't get here.";
466   return NULL;
467 }
468 
BoxedPrimitiveTypeName(const FieldDescriptor * descriptor)469 const char* BoxedPrimitiveTypeName(const FieldDescriptor* descriptor) {
470   return BoxedPrimitiveTypeName(GetJavaType(descriptor));
471 }
472 
KotlinTypeName(JavaType type)473 const char* KotlinTypeName(JavaType type) {
474   switch (type) {
475     case JAVATYPE_INT:
476       return "kotlin.Int";
477     case JAVATYPE_LONG:
478       return "kotlin.Long";
479     case JAVATYPE_FLOAT:
480       return "kotlin.Float";
481     case JAVATYPE_DOUBLE:
482       return "kotlin.Double";
483     case JAVATYPE_BOOLEAN:
484       return "kotlin.Boolean";
485     case JAVATYPE_STRING:
486       return "kotlin.String";
487     case JAVATYPE_BYTES:
488       return "com.google.protobuf.ByteString";
489     case JAVATYPE_ENUM:
490       return NULL;
491     case JAVATYPE_MESSAGE:
492       return NULL;
493 
494       // No default because we want the compiler to complain if any new
495       // JavaTypes are added.
496   }
497 
498   GOOGLE_LOG(FATAL) << "Can't get here.";
499   return NULL;
500 }
501 
GetOneofStoredType(const FieldDescriptor * field)502 std::string GetOneofStoredType(const FieldDescriptor* field) {
503   const JavaType javaType = GetJavaType(field);
504   switch (javaType) {
505     case JAVATYPE_ENUM:
506       return "java.lang.Integer";
507     case JAVATYPE_MESSAGE:
508       return ClassName(field->message_type());
509     default:
510       return BoxedPrimitiveTypeName(javaType);
511   }
512 }
513 
FieldTypeName(FieldDescriptor::Type field_type)514 const char* FieldTypeName(FieldDescriptor::Type field_type) {
515   switch (field_type) {
516     case FieldDescriptor::TYPE_INT32:
517       return "INT32";
518     case FieldDescriptor::TYPE_UINT32:
519       return "UINT32";
520     case FieldDescriptor::TYPE_SINT32:
521       return "SINT32";
522     case FieldDescriptor::TYPE_FIXED32:
523       return "FIXED32";
524     case FieldDescriptor::TYPE_SFIXED32:
525       return "SFIXED32";
526     case FieldDescriptor::TYPE_INT64:
527       return "INT64";
528     case FieldDescriptor::TYPE_UINT64:
529       return "UINT64";
530     case FieldDescriptor::TYPE_SINT64:
531       return "SINT64";
532     case FieldDescriptor::TYPE_FIXED64:
533       return "FIXED64";
534     case FieldDescriptor::TYPE_SFIXED64:
535       return "SFIXED64";
536     case FieldDescriptor::TYPE_FLOAT:
537       return "FLOAT";
538     case FieldDescriptor::TYPE_DOUBLE:
539       return "DOUBLE";
540     case FieldDescriptor::TYPE_BOOL:
541       return "BOOL";
542     case FieldDescriptor::TYPE_STRING:
543       return "STRING";
544     case FieldDescriptor::TYPE_BYTES:
545       return "BYTES";
546     case FieldDescriptor::TYPE_ENUM:
547       return "ENUM";
548     case FieldDescriptor::TYPE_GROUP:
549       return "GROUP";
550     case FieldDescriptor::TYPE_MESSAGE:
551       return "MESSAGE";
552 
553       // No default because we want the compiler to complain if any new
554       // types are added.
555   }
556 
557   GOOGLE_LOG(FATAL) << "Can't get here.";
558   return NULL;
559 }
560 
AllAscii(const std::string & text)561 bool AllAscii(const std::string& text) {
562   for (int i = 0; i < text.size(); i++) {
563     if ((text[i] & 0x80) != 0) {
564       return false;
565     }
566   }
567   return true;
568 }
569 
DefaultValue(const FieldDescriptor * field,bool immutable,ClassNameResolver * name_resolver)570 std::string DefaultValue(const FieldDescriptor* field, bool immutable,
571                          ClassNameResolver* name_resolver) {
572   // Switch on CppType since we need to know which default_value_* method
573   // of FieldDescriptor to call.
574   switch (field->cpp_type()) {
575     case FieldDescriptor::CPPTYPE_INT32:
576       return StrCat(field->default_value_int32());
577     case FieldDescriptor::CPPTYPE_UINT32:
578       // Need to print as a signed int since Java has no unsigned.
579       return StrCat(static_cast<int32_t>(field->default_value_uint32()));
580     case FieldDescriptor::CPPTYPE_INT64:
581       return StrCat(field->default_value_int64()) + "L";
582     case FieldDescriptor::CPPTYPE_UINT64:
583       return StrCat(static_cast<int64_t>(field->default_value_uint64())) +
584              "L";
585     case FieldDescriptor::CPPTYPE_DOUBLE: {
586       double value = field->default_value_double();
587       if (value == std::numeric_limits<double>::infinity()) {
588         return "Double.POSITIVE_INFINITY";
589       } else if (value == -std::numeric_limits<double>::infinity()) {
590         return "Double.NEGATIVE_INFINITY";
591       } else if (value != value) {
592         return "Double.NaN";
593       } else {
594         return SimpleDtoa(value) + "D";
595       }
596     }
597     case FieldDescriptor::CPPTYPE_FLOAT: {
598       float value = field->default_value_float();
599       if (value == std::numeric_limits<float>::infinity()) {
600         return "Float.POSITIVE_INFINITY";
601       } else if (value == -std::numeric_limits<float>::infinity()) {
602         return "Float.NEGATIVE_INFINITY";
603       } else if (value != value) {
604         return "Float.NaN";
605       } else {
606         return SimpleFtoa(value) + "F";
607       }
608     }
609     case FieldDescriptor::CPPTYPE_BOOL:
610       return field->default_value_bool() ? "true" : "false";
611     case FieldDescriptor::CPPTYPE_STRING:
612       if (GetType(field) == FieldDescriptor::TYPE_BYTES) {
613         if (field->has_default_value()) {
614           // See comments in Internal.java for gory details.
615           return strings::Substitute(
616               "com.google.protobuf.Internal.bytesDefaultValue(\"$0\")",
617               CEscape(field->default_value_string()));
618         } else {
619           return "com.google.protobuf.ByteString.EMPTY";
620         }
621       } else {
622         if (AllAscii(field->default_value_string())) {
623           // All chars are ASCII.  In this case CEscape() works fine.
624           return "\"" + CEscape(field->default_value_string()) + "\"";
625         } else {
626           // See comments in Internal.java for gory details.
627           return strings::Substitute(
628               "com.google.protobuf.Internal.stringDefaultValue(\"$0\")",
629               CEscape(field->default_value_string()));
630         }
631       }
632 
633     case FieldDescriptor::CPPTYPE_ENUM:
634       return name_resolver->GetClassName(field->enum_type(), immutable) + "." +
635              field->default_value_enum()->name();
636 
637     case FieldDescriptor::CPPTYPE_MESSAGE:
638       return name_resolver->GetClassName(field->message_type(), immutable) +
639              ".getDefaultInstance()";
640 
641       // No default because we want the compiler to complain if any new
642       // types are added.
643   }
644 
645   GOOGLE_LOG(FATAL) << "Can't get here.";
646   return "";
647 }
648 
IsDefaultValueJavaDefault(const FieldDescriptor * field)649 bool IsDefaultValueJavaDefault(const FieldDescriptor* field) {
650   // Switch on CppType since we need to know which default_value_* method
651   // of FieldDescriptor to call.
652   switch (field->cpp_type()) {
653     case FieldDescriptor::CPPTYPE_INT32:
654       return field->default_value_int32() == 0;
655     case FieldDescriptor::CPPTYPE_UINT32:
656       return field->default_value_uint32() == 0;
657     case FieldDescriptor::CPPTYPE_INT64:
658       return field->default_value_int64() == 0L;
659     case FieldDescriptor::CPPTYPE_UINT64:
660       return field->default_value_uint64() == 0L;
661     case FieldDescriptor::CPPTYPE_DOUBLE:
662       return field->default_value_double() == 0.0;
663     case FieldDescriptor::CPPTYPE_FLOAT:
664       return field->default_value_float() == 0.0;
665     case FieldDescriptor::CPPTYPE_BOOL:
666       return field->default_value_bool() == false;
667     case FieldDescriptor::CPPTYPE_ENUM:
668       return field->default_value_enum()->number() == 0;
669     case FieldDescriptor::CPPTYPE_STRING:
670     case FieldDescriptor::CPPTYPE_MESSAGE:
671       return false;
672 
673       // No default because we want the compiler to complain if any new
674       // types are added.
675   }
676 
677   GOOGLE_LOG(FATAL) << "Can't get here.";
678   return false;
679 }
680 
IsByteStringWithCustomDefaultValue(const FieldDescriptor * field)681 bool IsByteStringWithCustomDefaultValue(const FieldDescriptor* field) {
682   return GetJavaType(field) == JAVATYPE_BYTES &&
683          field->default_value_string() != "";
684 }
685 
686 const char* bit_masks[] = {
687     "0x00000001", "0x00000002", "0x00000004", "0x00000008",
688     "0x00000010", "0x00000020", "0x00000040", "0x00000080",
689 
690     "0x00000100", "0x00000200", "0x00000400", "0x00000800",
691     "0x00001000", "0x00002000", "0x00004000", "0x00008000",
692 
693     "0x00010000", "0x00020000", "0x00040000", "0x00080000",
694     "0x00100000", "0x00200000", "0x00400000", "0x00800000",
695 
696     "0x01000000", "0x02000000", "0x04000000", "0x08000000",
697     "0x10000000", "0x20000000", "0x40000000", "0x80000000",
698 };
699 
GetBitFieldName(int index)700 std::string GetBitFieldName(int index) {
701   std::string varName = "bitField";
702   varName += StrCat(index);
703   varName += "_";
704   return varName;
705 }
706 
GetBitFieldNameForBit(int bitIndex)707 std::string GetBitFieldNameForBit(int bitIndex) {
708   return GetBitFieldName(bitIndex / 32);
709 }
710 
711 namespace {
712 
GenerateGetBitInternal(const std::string & prefix,int bitIndex)713 std::string GenerateGetBitInternal(const std::string& prefix, int bitIndex) {
714   std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
715   int bitInVarIndex = bitIndex % 32;
716 
717   std::string mask = bit_masks[bitInVarIndex];
718   std::string result = "((" + varName + " & " + mask + ") != 0)";
719   return result;
720 }
721 
GenerateSetBitInternal(const std::string & prefix,int bitIndex)722 std::string GenerateSetBitInternal(const std::string& prefix, int bitIndex) {
723   std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
724   int bitInVarIndex = bitIndex % 32;
725 
726   std::string mask = bit_masks[bitInVarIndex];
727   std::string result = varName + " |= " + mask;
728   return result;
729 }
730 
731 }  // namespace
732 
GenerateGetBit(int bitIndex)733 std::string GenerateGetBit(int bitIndex) {
734   return GenerateGetBitInternal("", bitIndex);
735 }
736 
GenerateSetBit(int bitIndex)737 std::string GenerateSetBit(int bitIndex) {
738   return GenerateSetBitInternal("", bitIndex);
739 }
740 
GenerateClearBit(int bitIndex)741 std::string GenerateClearBit(int bitIndex) {
742   std::string varName = GetBitFieldNameForBit(bitIndex);
743   int bitInVarIndex = bitIndex % 32;
744 
745   std::string mask = bit_masks[bitInVarIndex];
746   std::string result = varName + " = (" + varName + " & ~" + mask + ")";
747   return result;
748 }
749 
GenerateGetBitFromLocal(int bitIndex)750 std::string GenerateGetBitFromLocal(int bitIndex) {
751   return GenerateGetBitInternal("from_", bitIndex);
752 }
753 
GenerateSetBitToLocal(int bitIndex)754 std::string GenerateSetBitToLocal(int bitIndex) {
755   return GenerateSetBitInternal("to_", bitIndex);
756 }
757 
GenerateGetBitMutableLocal(int bitIndex)758 std::string GenerateGetBitMutableLocal(int bitIndex) {
759   return GenerateGetBitInternal("mutable_", bitIndex);
760 }
761 
GenerateSetBitMutableLocal(int bitIndex)762 std::string GenerateSetBitMutableLocal(int bitIndex) {
763   return GenerateSetBitInternal("mutable_", bitIndex);
764 }
765 
IsReferenceType(JavaType type)766 bool IsReferenceType(JavaType type) {
767   switch (type) {
768     case JAVATYPE_INT:
769       return false;
770     case JAVATYPE_LONG:
771       return false;
772     case JAVATYPE_FLOAT:
773       return false;
774     case JAVATYPE_DOUBLE:
775       return false;
776     case JAVATYPE_BOOLEAN:
777       return false;
778     case JAVATYPE_STRING:
779       return true;
780     case JAVATYPE_BYTES:
781       return true;
782     case JAVATYPE_ENUM:
783       return true;
784     case JAVATYPE_MESSAGE:
785       return true;
786 
787       // No default because we want the compiler to complain if any new
788       // JavaTypes are added.
789   }
790 
791   GOOGLE_LOG(FATAL) << "Can't get here.";
792   return false;
793 }
794 
GetCapitalizedType(const FieldDescriptor * field,bool immutable)795 const char* GetCapitalizedType(const FieldDescriptor* field, bool immutable) {
796   switch (GetType(field)) {
797     case FieldDescriptor::TYPE_INT32:
798       return "Int32";
799     case FieldDescriptor::TYPE_UINT32:
800       return "UInt32";
801     case FieldDescriptor::TYPE_SINT32:
802       return "SInt32";
803     case FieldDescriptor::TYPE_FIXED32:
804       return "Fixed32";
805     case FieldDescriptor::TYPE_SFIXED32:
806       return "SFixed32";
807     case FieldDescriptor::TYPE_INT64:
808       return "Int64";
809     case FieldDescriptor::TYPE_UINT64:
810       return "UInt64";
811     case FieldDescriptor::TYPE_SINT64:
812       return "SInt64";
813     case FieldDescriptor::TYPE_FIXED64:
814       return "Fixed64";
815     case FieldDescriptor::TYPE_SFIXED64:
816       return "SFixed64";
817     case FieldDescriptor::TYPE_FLOAT:
818       return "Float";
819     case FieldDescriptor::TYPE_DOUBLE:
820       return "Double";
821     case FieldDescriptor::TYPE_BOOL:
822       return "Bool";
823     case FieldDescriptor::TYPE_STRING:
824       return "String";
825     case FieldDescriptor::TYPE_BYTES: {
826       return "Bytes";
827     }
828     case FieldDescriptor::TYPE_ENUM:
829       return "Enum";
830     case FieldDescriptor::TYPE_GROUP:
831       return "Group";
832     case FieldDescriptor::TYPE_MESSAGE:
833       return "Message";
834 
835       // No default because we want the compiler to complain if any new
836       // types are added.
837   }
838 
839   GOOGLE_LOG(FATAL) << "Can't get here.";
840   return NULL;
841 }
842 
843 // For encodings with fixed sizes, returns that size in bytes.  Otherwise
844 // returns -1.
FixedSize(FieldDescriptor::Type type)845 int FixedSize(FieldDescriptor::Type type) {
846   switch (type) {
847     case FieldDescriptor::TYPE_INT32:
848       return -1;
849     case FieldDescriptor::TYPE_INT64:
850       return -1;
851     case FieldDescriptor::TYPE_UINT32:
852       return -1;
853     case FieldDescriptor::TYPE_UINT64:
854       return -1;
855     case FieldDescriptor::TYPE_SINT32:
856       return -1;
857     case FieldDescriptor::TYPE_SINT64:
858       return -1;
859     case FieldDescriptor::TYPE_FIXED32:
860       return WireFormatLite::kFixed32Size;
861     case FieldDescriptor::TYPE_FIXED64:
862       return WireFormatLite::kFixed64Size;
863     case FieldDescriptor::TYPE_SFIXED32:
864       return WireFormatLite::kSFixed32Size;
865     case FieldDescriptor::TYPE_SFIXED64:
866       return WireFormatLite::kSFixed64Size;
867     case FieldDescriptor::TYPE_FLOAT:
868       return WireFormatLite::kFloatSize;
869     case FieldDescriptor::TYPE_DOUBLE:
870       return WireFormatLite::kDoubleSize;
871 
872     case FieldDescriptor::TYPE_BOOL:
873       return WireFormatLite::kBoolSize;
874     case FieldDescriptor::TYPE_ENUM:
875       return -1;
876 
877     case FieldDescriptor::TYPE_STRING:
878       return -1;
879     case FieldDescriptor::TYPE_BYTES:
880       return -1;
881     case FieldDescriptor::TYPE_GROUP:
882       return -1;
883     case FieldDescriptor::TYPE_MESSAGE:
884       return -1;
885 
886       // No default because we want the compiler to complain if any new
887       // types are added.
888   }
889   GOOGLE_LOG(FATAL) << "Can't get here.";
890   return -1;
891 }
892 
893 // Sort the fields of the given Descriptor by number into a new[]'d array
894 // and return it. The caller should delete the returned array.
SortFieldsByNumber(const Descriptor * descriptor)895 const FieldDescriptor** SortFieldsByNumber(const Descriptor* descriptor) {
896   const FieldDescriptor** fields =
897       new const FieldDescriptor*[descriptor->field_count()];
898   for (int i = 0; i < descriptor->field_count(); i++) {
899     fields[i] = descriptor->field(i);
900   }
901   std::sort(fields, fields + descriptor->field_count(),
902             FieldOrderingByNumber());
903   return fields;
904 }
905 
906 // Returns true if the message type has any required fields.  If it doesn't,
907 // we can optimize out calls to its isInitialized() method.
908 //
909 // already_seen is used to avoid checking the same type multiple times
910 // (and also to protect against recursion).
HasRequiredFields(const Descriptor * type,std::unordered_set<const Descriptor * > * already_seen)911 bool HasRequiredFields(const Descriptor* type,
912                        std::unordered_set<const Descriptor*>* already_seen) {
913   if (already_seen->count(type) > 0) {
914     // The type is already in cache.  This means that either:
915     // a. The type has no required fields.
916     // b. We are in the midst of checking if the type has required fields,
917     //    somewhere up the stack.  In this case, we know that if the type
918     //    has any required fields, they'll be found when we return to it,
919     //    and the whole call to HasRequiredFields() will return true.
920     //    Therefore, we don't have to check if this type has required fields
921     //    here.
922     return false;
923   }
924   already_seen->insert(type);
925 
926   // If the type has extensions, an extension with message type could contain
927   // required fields, so we have to be conservative and assume such an
928   // extension exists.
929   if (type->extension_range_count() > 0) return true;
930 
931   for (int i = 0; i < type->field_count(); i++) {
932     const FieldDescriptor* field = type->field(i);
933     if (field->is_required()) {
934       return true;
935     }
936     if (GetJavaType(field) == JAVATYPE_MESSAGE) {
937       if (HasRequiredFields(field->message_type(), already_seen)) {
938         return true;
939       }
940     }
941   }
942 
943   return false;
944 }
945 
HasRequiredFields(const Descriptor * type)946 bool HasRequiredFields(const Descriptor* type) {
947   std::unordered_set<const Descriptor*> already_seen;
948   return HasRequiredFields(type, &already_seen);
949 }
950 
HasRepeatedFields(const Descriptor * descriptor)951 bool HasRepeatedFields(const Descriptor* descriptor) {
952   for (int i = 0; i < descriptor->field_count(); ++i) {
953     const FieldDescriptor* field = descriptor->field(i);
954     if (field->is_repeated()) {
955       return true;
956     }
957   }
958   return false;
959 }
960 
961 // Encode an unsigned 32-bit value into a sequence of UTF-16 characters.
962 //
963 // If the value is in [0x0000, 0xD7FF], we encode it with a single character
964 // with the same numeric value.
965 //
966 // If the value is larger than 0xD7FF, we encode its lowest 13 bits into a
967 // character in the range [0xE000, 0xFFFF] by combining these 13 bits with
968 // 0xE000 using logic-or. Then we shift the value to the right by 13 bits, and
969 // encode the remaining value by repeating this same process until we get to
970 // a value in [0x0000, 0xD7FF] where we will encode it using a character with
971 // the same numeric value.
972 //
973 // Note that we only use code points in [0x0000, 0xD7FF] and [0xE000, 0xFFFF].
974 // There will be no surrogate pairs in the encoded character sequence.
WriteUInt32ToUtf16CharSequence(uint32_t number,std::vector<uint16_t> * output)975 void WriteUInt32ToUtf16CharSequence(uint32_t number,
976                                     std::vector<uint16_t>* output) {
977   // For values in [0x0000, 0xD7FF], only use one char to encode it.
978   if (number < 0xD800) {
979     output->push_back(static_cast<uint16_t>(number));
980     return;
981   }
982   // Encode into multiple chars. All except the last char will be in the range
983   // [0xE000, 0xFFFF], and the last char will be in the range [0x0000, 0xD7FF].
984   // Note that we don't use any value in range [0xD800, 0xDFFF] because they
985   // have to come in pairs and the encoding is just more space-efficient w/o
986   // them.
987   while (number >= 0xD800) {
988     // [0xE000, 0xFFFF] can represent 13 bits of info.
989     output->push_back(static_cast<uint16_t>(0xE000 | (number & 0x1FFF)));
990     number >>= 13;
991   }
992   output->push_back(static_cast<uint16_t>(number));
993 }
994 
GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor * field)995 int GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor* field) {
996   // j/c/g/protobuf/FieldType.java lists field types in a slightly different
997   // order from FieldDescriptor::Type so we can't do a simple cast.
998   //
999   // TODO(xiaofeng): Make j/c/g/protobuf/FieldType.java follow the same order.
1000   int result = field->type();
1001   if (result == FieldDescriptor::TYPE_GROUP) {
1002     return 17;
1003   } else if (result < FieldDescriptor::TYPE_GROUP) {
1004     return result - 1;
1005   } else {
1006     return result - 2;
1007   }
1008 }
1009 
GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor * field)1010 int GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor* field) {
1011   if (field->type() == FieldDescriptor::TYPE_GROUP) {
1012     return 49;
1013   } else {
1014     return GetExperimentalJavaFieldTypeForSingular(field) + 18;
1015   }
1016 }
1017 
GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor * field)1018 int GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor* field) {
1019   int result = field->type();
1020   if (result < FieldDescriptor::TYPE_STRING) {
1021     return result + 34;
1022   } else if (result > FieldDescriptor::TYPE_BYTES) {
1023     return result + 30;
1024   } else {
1025     GOOGLE_LOG(FATAL) << field->full_name() << " can't be packed.";
1026     return 0;
1027   }
1028 }
1029 
GetExperimentalJavaFieldType(const FieldDescriptor * field)1030 int GetExperimentalJavaFieldType(const FieldDescriptor* field) {
1031   static const int kMapFieldType = 50;
1032   static const int kOneofFieldTypeOffset = 51;
1033   static const int kRequiredBit = 0x100;
1034   static const int kUtf8CheckBit = 0x200;
1035   static const int kCheckInitialized = 0x400;
1036   static const int kMapWithProto2EnumValue = 0x800;
1037   static const int kHasHasBit = 0x1000;
1038   int extra_bits = field->is_required() ? kRequiredBit : 0;
1039   if (field->type() == FieldDescriptor::TYPE_STRING && CheckUtf8(field)) {
1040     extra_bits |= kUtf8CheckBit;
1041   }
1042   if (field->is_required() || (GetJavaType(field) == JAVATYPE_MESSAGE &&
1043                                HasRequiredFields(field->message_type()))) {
1044     extra_bits |= kCheckInitialized;
1045   }
1046   if (HasHasbit(field)) {
1047     extra_bits |= kHasHasBit;
1048   }
1049 
1050   if (field->is_map()) {
1051     if (!SupportUnknownEnumValue(field)) {
1052       const FieldDescriptor* value =
1053           field->message_type()->FindFieldByName("value");
1054       if (GetJavaType(value) == JAVATYPE_ENUM) {
1055         extra_bits |= kMapWithProto2EnumValue;
1056       }
1057     }
1058     return kMapFieldType | extra_bits;
1059   } else if (field->is_packed()) {
1060     return GetExperimentalJavaFieldTypeForPacked(field);
1061   } else if (field->is_repeated()) {
1062     return GetExperimentalJavaFieldTypeForRepeated(field) | extra_bits;
1063   } else if (IsRealOneof(field)) {
1064     return (GetExperimentalJavaFieldTypeForSingular(field) +
1065             kOneofFieldTypeOffset) |
1066            extra_bits;
1067   } else {
1068     return GetExperimentalJavaFieldTypeForSingular(field) | extra_bits;
1069   }
1070 }
1071 
1072 // Escape a UTF-16 character to be embedded in a Java string.
EscapeUtf16ToString(uint16_t code,std::string * output)1073 void EscapeUtf16ToString(uint16_t code, std::string* output) {
1074   if (code == '\t') {
1075     output->append("\\t");
1076   } else if (code == '\b') {
1077     output->append("\\b");
1078   } else if (code == '\n') {
1079     output->append("\\n");
1080   } else if (code == '\r') {
1081     output->append("\\r");
1082   } else if (code == '\f') {
1083     output->append("\\f");
1084   } else if (code == '\'') {
1085     output->append("\\'");
1086   } else if (code == '\"') {
1087     output->append("\\\"");
1088   } else if (code == '\\') {
1089     output->append("\\\\");
1090   } else if (code >= 0x20 && code <= 0x7f) {
1091     output->push_back(static_cast<char>(code));
1092   } else {
1093     output->append(StringPrintf("\\u%04x", code));
1094   }
1095 }
1096 
1097 }  // namespace java
1098 }  // namespace compiler
1099 }  // namespace protobuf
1100 }  // namespace google
1101