1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <google/protobuf/compiler/java/java_helpers.h>
36
37 #include <algorithm>
38 #include <cstdint>
39 #include <limits>
40 #include <unordered_set>
41 #include <vector>
42
43 #include <google/protobuf/stubs/stringprintf.h>
44 #include <google/protobuf/compiler/java/java_name_resolver.h>
45 #include <google/protobuf/compiler/java/java_names.h>
46 #include <google/protobuf/descriptor.pb.h>
47 #include <google/protobuf/wire_format.h>
48 #include <google/protobuf/stubs/strutil.h>
49 #include <google/protobuf/stubs/substitute.h>
50 #include <google/protobuf/stubs/hash.h> // for hash<T *>
51
52 namespace google {
53 namespace protobuf {
54 namespace compiler {
55 namespace java {
56
57 using internal::WireFormat;
58 using internal::WireFormatLite;
59
60 const char kThickSeparator[] =
61 "// ===================================================================\n";
62 const char kThinSeparator[] =
63 "// -------------------------------------------------------------------\n";
64
65 namespace {
66
67 const char* kDefaultPackage = "";
68
69 // Names that should be avoided as field names.
70 // Using them will cause the compiler to generate accessors whose names are
71 // colliding with methods defined in base classes.
72 const char* kForbiddenWordList[] = {
73 // message base class:
74 "cached_size",
75 "serialized_size",
76 // java.lang.Object:
77 "class",
78 };
79
80 const std::unordered_set<std::string>* kReservedNames =
81 new std::unordered_set<std::string>({
82 "abstract", "assert", "boolean", "break", "byte",
83 "case", "catch", "char", "class", "const",
84 "continue", "default", "do", "double", "else",
85 "enum", "extends", "final", "finally", "float",
86 "for", "goto", "if", "implements", "import",
87 "instanceof", "int", "interface", "long", "native",
88 "new", "package", "private", "protected", "public",
89 "return", "short", "static", "strictfp", "super",
90 "switch", "synchronized", "this", "throw", "throws",
91 "transient", "try", "void", "volatile", "while",
92 });
93
IsForbidden(const std::string & field_name)94 bool IsForbidden(const std::string& field_name) {
95 for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) {
96 if (field_name == kForbiddenWordList[i]) {
97 return true;
98 }
99 }
100 return false;
101 }
102
FieldName(const FieldDescriptor * field)103 std::string FieldName(const FieldDescriptor* field) {
104 std::string field_name;
105 // Groups are hacky: The name of the field is just the lower-cased name
106 // of the group type. In Java, though, we would like to retain the original
107 // capitalization of the type name.
108 if (GetType(field) == FieldDescriptor::TYPE_GROUP) {
109 field_name = field->message_type()->name();
110 } else {
111 field_name = field->name();
112 }
113 if (IsForbidden(field_name)) {
114 // Append a trailing "#" to indicate that the name should be decorated to
115 // avoid collision with other names.
116 field_name += "#";
117 }
118 return field_name;
119 }
120
121
122 } // namespace
123
PrintGeneratedAnnotation(io::Printer * printer,char delimiter,const std::string & annotation_file)124 void PrintGeneratedAnnotation(io::Printer* printer, char delimiter,
125 const std::string& annotation_file) {
126 if (annotation_file.empty()) {
127 return;
128 }
129 std::string ptemplate =
130 "@javax.annotation.Generated(value=\"protoc\", comments=\"annotations:";
131 ptemplate.push_back(delimiter);
132 ptemplate.append("annotation_file");
133 ptemplate.push_back(delimiter);
134 ptemplate.append("\")\n");
135 printer->Print(ptemplate.c_str(), "annotation_file", annotation_file);
136 }
137
PrintEnumVerifierLogic(io::Printer * printer,const FieldDescriptor * descriptor,const std::map<std::string,std::string> & variables,const char * var_name,const char * terminating_string,bool enforce_lite)138 void PrintEnumVerifierLogic(io::Printer* printer,
139 const FieldDescriptor* descriptor,
140 const std::map<std::string, std::string>& variables,
141 const char* var_name,
142 const char* terminating_string, bool enforce_lite) {
143 std::string enum_verifier_string =
144 enforce_lite ? StrCat(var_name, ".internalGetVerifier()")
145 : StrCat(
146 "new com.google.protobuf.Internal.EnumVerifier() {\n"
147 " @java.lang.Override\n"
148 " public boolean isInRange(int number) {\n"
149 " return ",
150 var_name,
151 ".forNumber(number) != null;\n"
152 " }\n"
153 " }");
154 printer->Print(
155 variables,
156 StrCat(enum_verifier_string, terminating_string).c_str());
157 }
158
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)159 std::string UnderscoresToCamelCase(const std::string& input,
160 bool cap_next_letter) {
161 GOOGLE_CHECK(!input.empty());
162 std::string result;
163 // Note: I distrust ctype.h due to locales.
164 for (int i = 0; i < input.size(); i++) {
165 if ('a' <= input[i] && input[i] <= 'z') {
166 if (cap_next_letter) {
167 result += input[i] + ('A' - 'a');
168 } else {
169 result += input[i];
170 }
171 cap_next_letter = false;
172 } else if ('A' <= input[i] && input[i] <= 'Z') {
173 if (i == 0 && !cap_next_letter) {
174 // Force first letter to lower-case unless explicitly told to
175 // capitalize it.
176 result += input[i] + ('a' - 'A');
177 } else {
178 // Capital letters after the first are left as-is.
179 result += input[i];
180 }
181 cap_next_letter = false;
182 } else if ('0' <= input[i] && input[i] <= '9') {
183 result += input[i];
184 cap_next_letter = true;
185 } else {
186 cap_next_letter = true;
187 }
188 }
189 // Add a trailing "_" if the name should be altered.
190 if (input[input.size() - 1] == '#') {
191 result += '_';
192 }
193 return result;
194 }
195
ToCamelCase(const std::string & input,bool lower_first)196 std::string ToCamelCase(const std::string& input, bool lower_first) {
197 bool capitalize_next = !lower_first;
198 std::string result;
199 result.reserve(input.size());
200
201 for (char i : input) {
202 if (i == '_') {
203 capitalize_next = true;
204 } else if (capitalize_next) {
205 result.push_back(ToUpperCh(i));
206 capitalize_next = false;
207 } else {
208 result.push_back(i);
209 }
210 }
211
212 // Lower-case the first letter.
213 if (lower_first && !result.empty()) {
214 result[0] = ToLowerCh(result[0]);
215 }
216
217 return result;
218 }
219
ToUpperCh(char ch)220 char ToUpperCh(char ch) {
221 return (ch >= 'a' && ch <= 'z') ? (ch - 'a' + 'A') : ch;
222 }
223
ToLowerCh(char ch)224 char ToLowerCh(char ch) {
225 return (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
226 }
227
UnderscoresToCamelCase(const FieldDescriptor * field)228 std::string UnderscoresToCamelCase(const FieldDescriptor* field) {
229 return UnderscoresToCamelCase(FieldName(field), false);
230 }
231
UnderscoresToCapitalizedCamelCase(const FieldDescriptor * field)232 std::string UnderscoresToCapitalizedCamelCase(const FieldDescriptor* field) {
233 return UnderscoresToCamelCase(FieldName(field), true);
234 }
235
CapitalizedFieldName(const FieldDescriptor * field)236 std::string CapitalizedFieldName(const FieldDescriptor* field) {
237 return UnderscoresToCapitalizedCamelCase(field);
238 }
239
UnderscoresToCamelCase(const MethodDescriptor * method)240 std::string UnderscoresToCamelCase(const MethodDescriptor* method) {
241 return UnderscoresToCamelCase(method->name(), false);
242 }
243
UnderscoresToCamelCaseCheckReserved(const FieldDescriptor * field)244 std::string UnderscoresToCamelCaseCheckReserved(const FieldDescriptor* field) {
245 std::string name = UnderscoresToCamelCase(field);
246 if (kReservedNames->find(name) != kReservedNames->end()) {
247 return name + "_";
248 }
249 return name;
250 }
251
IsForbiddenKotlin(const std::string & field_name)252 bool IsForbiddenKotlin(const std::string& field_name) {
253 // Names that should be avoided as field names in Kotlin.
254 // All Kotlin hard keywords are in this list.
255 const std::unordered_set<std::string>* kKotlinForbiddenNames =
256 new std::unordered_set<std::string>({
257 "as", "as?", "break", "class", "continue", "do",
258 "else", "false", "for", "fun", "if", "in",
259 "!in", "interface", "is", "!is", "null", "object",
260 "package", "return", "super", "this", "throw", "true",
261 "try", "typealias", "typeof", "val", "var", "when",
262 "while",
263 });
264 return kKotlinForbiddenNames->find(field_name) !=
265 kKotlinForbiddenNames->end();
266 }
267
UniqueFileScopeIdentifier(const Descriptor * descriptor)268 std::string UniqueFileScopeIdentifier(const Descriptor* descriptor) {
269 return "static_" + StringReplace(descriptor->full_name(), ".", "_", true);
270 }
271
CamelCaseFieldName(const FieldDescriptor * field)272 std::string CamelCaseFieldName(const FieldDescriptor* field) {
273 std::string fieldName = UnderscoresToCamelCase(field);
274 if ('0' <= fieldName[0] && fieldName[0] <= '9') {
275 return '_' + fieldName;
276 }
277 return fieldName;
278 }
279
FileClassName(const FileDescriptor * file,bool immutable)280 std::string FileClassName(const FileDescriptor* file, bool immutable) {
281 ClassNameResolver name_resolver;
282 return name_resolver.GetFileClassName(file, immutable);
283 }
284
FileJavaPackage(const FileDescriptor * file,bool immutable)285 std::string FileJavaPackage(const FileDescriptor* file, bool immutable) {
286 std::string result;
287
288 if (file->options().has_java_package()) {
289 result = file->options().java_package();
290 } else {
291 result = kDefaultPackage;
292 if (!file->package().empty()) {
293 if (!result.empty()) result += '.';
294 result += file->package();
295 }
296 }
297
298 return result;
299 }
300
FileJavaPackage(const FileDescriptor * file)301 std::string FileJavaPackage(const FileDescriptor* file) {
302 return FileJavaPackage(file, true /* immutable */);
303 }
304
JavaPackageToDir(std::string package_name)305 std::string JavaPackageToDir(std::string package_name) {
306 std::string package_dir = StringReplace(package_name, ".", "/", true);
307 if (!package_dir.empty()) package_dir += "/";
308 return package_dir;
309 }
310
ClassName(const Descriptor * descriptor)311 std::string ClassName(const Descriptor* descriptor) {
312 ClassNameResolver name_resolver;
313 return name_resolver.GetClassName(descriptor, true);
314 }
315
ClassName(const EnumDescriptor * descriptor)316 std::string ClassName(const EnumDescriptor* descriptor) {
317 ClassNameResolver name_resolver;
318 return name_resolver.GetClassName(descriptor, true);
319 }
320
ClassName(const ServiceDescriptor * descriptor)321 std::string ClassName(const ServiceDescriptor* descriptor) {
322 ClassNameResolver name_resolver;
323 return name_resolver.GetClassName(descriptor, true);
324 }
325
ClassName(const FileDescriptor * descriptor)326 std::string ClassName(const FileDescriptor* descriptor) {
327 ClassNameResolver name_resolver;
328 return name_resolver.GetClassName(descriptor, true);
329 }
330
331
ExtraMessageInterfaces(const Descriptor * descriptor)332 std::string ExtraMessageInterfaces(const Descriptor* descriptor) {
333 std::string interfaces = "// @@protoc_insertion_point(message_implements:" +
334 descriptor->full_name() + ")";
335 return interfaces;
336 }
337
338
ExtraBuilderInterfaces(const Descriptor * descriptor)339 std::string ExtraBuilderInterfaces(const Descriptor* descriptor) {
340 std::string interfaces = "// @@protoc_insertion_point(builder_implements:" +
341 descriptor->full_name() + ")";
342 return interfaces;
343 }
344
ExtraMessageOrBuilderInterfaces(const Descriptor * descriptor)345 std::string ExtraMessageOrBuilderInterfaces(const Descriptor* descriptor) {
346 std::string interfaces = "// @@protoc_insertion_point(interface_extends:" +
347 descriptor->full_name() + ")";
348 return interfaces;
349 }
350
FieldConstantName(const FieldDescriptor * field)351 std::string FieldConstantName(const FieldDescriptor* field) {
352 std::string name = field->name() + "_FIELD_NUMBER";
353 ToUpper(&name);
354 return name;
355 }
356
GetType(const FieldDescriptor * field)357 FieldDescriptor::Type GetType(const FieldDescriptor* field) {
358 return field->type();
359 }
360
GetJavaType(const FieldDescriptor * field)361 JavaType GetJavaType(const FieldDescriptor* field) {
362 switch (GetType(field)) {
363 case FieldDescriptor::TYPE_INT32:
364 case FieldDescriptor::TYPE_UINT32:
365 case FieldDescriptor::TYPE_SINT32:
366 case FieldDescriptor::TYPE_FIXED32:
367 case FieldDescriptor::TYPE_SFIXED32:
368 return JAVATYPE_INT;
369
370 case FieldDescriptor::TYPE_INT64:
371 case FieldDescriptor::TYPE_UINT64:
372 case FieldDescriptor::TYPE_SINT64:
373 case FieldDescriptor::TYPE_FIXED64:
374 case FieldDescriptor::TYPE_SFIXED64:
375 return JAVATYPE_LONG;
376
377 case FieldDescriptor::TYPE_FLOAT:
378 return JAVATYPE_FLOAT;
379
380 case FieldDescriptor::TYPE_DOUBLE:
381 return JAVATYPE_DOUBLE;
382
383 case FieldDescriptor::TYPE_BOOL:
384 return JAVATYPE_BOOLEAN;
385
386 case FieldDescriptor::TYPE_STRING:
387 return JAVATYPE_STRING;
388
389 case FieldDescriptor::TYPE_BYTES:
390 return JAVATYPE_BYTES;
391
392 case FieldDescriptor::TYPE_ENUM:
393 return JAVATYPE_ENUM;
394
395 case FieldDescriptor::TYPE_GROUP:
396 case FieldDescriptor::TYPE_MESSAGE:
397 return JAVATYPE_MESSAGE;
398
399 // No default because we want the compiler to complain if any new
400 // types are added.
401 }
402
403 GOOGLE_LOG(FATAL) << "Can't get here.";
404 return JAVATYPE_INT;
405 }
406
PrimitiveTypeName(JavaType type)407 const char* PrimitiveTypeName(JavaType type) {
408 switch (type) {
409 case JAVATYPE_INT:
410 return "int";
411 case JAVATYPE_LONG:
412 return "long";
413 case JAVATYPE_FLOAT:
414 return "float";
415 case JAVATYPE_DOUBLE:
416 return "double";
417 case JAVATYPE_BOOLEAN:
418 return "boolean";
419 case JAVATYPE_STRING:
420 return "java.lang.String";
421 case JAVATYPE_BYTES:
422 return "com.google.protobuf.ByteString";
423 case JAVATYPE_ENUM:
424 return NULL;
425 case JAVATYPE_MESSAGE:
426 return NULL;
427
428 // No default because we want the compiler to complain if any new
429 // JavaTypes are added.
430 }
431
432 GOOGLE_LOG(FATAL) << "Can't get here.";
433 return NULL;
434 }
435
PrimitiveTypeName(const FieldDescriptor * descriptor)436 const char* PrimitiveTypeName(const FieldDescriptor* descriptor) {
437 return PrimitiveTypeName(GetJavaType(descriptor));
438 }
439
BoxedPrimitiveTypeName(JavaType type)440 const char* BoxedPrimitiveTypeName(JavaType type) {
441 switch (type) {
442 case JAVATYPE_INT:
443 return "java.lang.Integer";
444 case JAVATYPE_LONG:
445 return "java.lang.Long";
446 case JAVATYPE_FLOAT:
447 return "java.lang.Float";
448 case JAVATYPE_DOUBLE:
449 return "java.lang.Double";
450 case JAVATYPE_BOOLEAN:
451 return "java.lang.Boolean";
452 case JAVATYPE_STRING:
453 return "java.lang.String";
454 case JAVATYPE_BYTES:
455 return "com.google.protobuf.ByteString";
456 case JAVATYPE_ENUM:
457 return NULL;
458 case JAVATYPE_MESSAGE:
459 return NULL;
460
461 // No default because we want the compiler to complain if any new
462 // JavaTypes are added.
463 }
464
465 GOOGLE_LOG(FATAL) << "Can't get here.";
466 return NULL;
467 }
468
BoxedPrimitiveTypeName(const FieldDescriptor * descriptor)469 const char* BoxedPrimitiveTypeName(const FieldDescriptor* descriptor) {
470 return BoxedPrimitiveTypeName(GetJavaType(descriptor));
471 }
472
KotlinTypeName(JavaType type)473 const char* KotlinTypeName(JavaType type) {
474 switch (type) {
475 case JAVATYPE_INT:
476 return "kotlin.Int";
477 case JAVATYPE_LONG:
478 return "kotlin.Long";
479 case JAVATYPE_FLOAT:
480 return "kotlin.Float";
481 case JAVATYPE_DOUBLE:
482 return "kotlin.Double";
483 case JAVATYPE_BOOLEAN:
484 return "kotlin.Boolean";
485 case JAVATYPE_STRING:
486 return "kotlin.String";
487 case JAVATYPE_BYTES:
488 return "com.google.protobuf.ByteString";
489 case JAVATYPE_ENUM:
490 return NULL;
491 case JAVATYPE_MESSAGE:
492 return NULL;
493
494 // No default because we want the compiler to complain if any new
495 // JavaTypes are added.
496 }
497
498 GOOGLE_LOG(FATAL) << "Can't get here.";
499 return NULL;
500 }
501
GetOneofStoredType(const FieldDescriptor * field)502 std::string GetOneofStoredType(const FieldDescriptor* field) {
503 const JavaType javaType = GetJavaType(field);
504 switch (javaType) {
505 case JAVATYPE_ENUM:
506 return "java.lang.Integer";
507 case JAVATYPE_MESSAGE:
508 return ClassName(field->message_type());
509 default:
510 return BoxedPrimitiveTypeName(javaType);
511 }
512 }
513
FieldTypeName(FieldDescriptor::Type field_type)514 const char* FieldTypeName(FieldDescriptor::Type field_type) {
515 switch (field_type) {
516 case FieldDescriptor::TYPE_INT32:
517 return "INT32";
518 case FieldDescriptor::TYPE_UINT32:
519 return "UINT32";
520 case FieldDescriptor::TYPE_SINT32:
521 return "SINT32";
522 case FieldDescriptor::TYPE_FIXED32:
523 return "FIXED32";
524 case FieldDescriptor::TYPE_SFIXED32:
525 return "SFIXED32";
526 case FieldDescriptor::TYPE_INT64:
527 return "INT64";
528 case FieldDescriptor::TYPE_UINT64:
529 return "UINT64";
530 case FieldDescriptor::TYPE_SINT64:
531 return "SINT64";
532 case FieldDescriptor::TYPE_FIXED64:
533 return "FIXED64";
534 case FieldDescriptor::TYPE_SFIXED64:
535 return "SFIXED64";
536 case FieldDescriptor::TYPE_FLOAT:
537 return "FLOAT";
538 case FieldDescriptor::TYPE_DOUBLE:
539 return "DOUBLE";
540 case FieldDescriptor::TYPE_BOOL:
541 return "BOOL";
542 case FieldDescriptor::TYPE_STRING:
543 return "STRING";
544 case FieldDescriptor::TYPE_BYTES:
545 return "BYTES";
546 case FieldDescriptor::TYPE_ENUM:
547 return "ENUM";
548 case FieldDescriptor::TYPE_GROUP:
549 return "GROUP";
550 case FieldDescriptor::TYPE_MESSAGE:
551 return "MESSAGE";
552
553 // No default because we want the compiler to complain if any new
554 // types are added.
555 }
556
557 GOOGLE_LOG(FATAL) << "Can't get here.";
558 return NULL;
559 }
560
AllAscii(const std::string & text)561 bool AllAscii(const std::string& text) {
562 for (int i = 0; i < text.size(); i++) {
563 if ((text[i] & 0x80) != 0) {
564 return false;
565 }
566 }
567 return true;
568 }
569
DefaultValue(const FieldDescriptor * field,bool immutable,ClassNameResolver * name_resolver)570 std::string DefaultValue(const FieldDescriptor* field, bool immutable,
571 ClassNameResolver* name_resolver) {
572 // Switch on CppType since we need to know which default_value_* method
573 // of FieldDescriptor to call.
574 switch (field->cpp_type()) {
575 case FieldDescriptor::CPPTYPE_INT32:
576 return StrCat(field->default_value_int32());
577 case FieldDescriptor::CPPTYPE_UINT32:
578 // Need to print as a signed int since Java has no unsigned.
579 return StrCat(static_cast<int32_t>(field->default_value_uint32()));
580 case FieldDescriptor::CPPTYPE_INT64:
581 return StrCat(field->default_value_int64()) + "L";
582 case FieldDescriptor::CPPTYPE_UINT64:
583 return StrCat(static_cast<int64_t>(field->default_value_uint64())) +
584 "L";
585 case FieldDescriptor::CPPTYPE_DOUBLE: {
586 double value = field->default_value_double();
587 if (value == std::numeric_limits<double>::infinity()) {
588 return "Double.POSITIVE_INFINITY";
589 } else if (value == -std::numeric_limits<double>::infinity()) {
590 return "Double.NEGATIVE_INFINITY";
591 } else if (value != value) {
592 return "Double.NaN";
593 } else {
594 return SimpleDtoa(value) + "D";
595 }
596 }
597 case FieldDescriptor::CPPTYPE_FLOAT: {
598 float value = field->default_value_float();
599 if (value == std::numeric_limits<float>::infinity()) {
600 return "Float.POSITIVE_INFINITY";
601 } else if (value == -std::numeric_limits<float>::infinity()) {
602 return "Float.NEGATIVE_INFINITY";
603 } else if (value != value) {
604 return "Float.NaN";
605 } else {
606 return SimpleFtoa(value) + "F";
607 }
608 }
609 case FieldDescriptor::CPPTYPE_BOOL:
610 return field->default_value_bool() ? "true" : "false";
611 case FieldDescriptor::CPPTYPE_STRING:
612 if (GetType(field) == FieldDescriptor::TYPE_BYTES) {
613 if (field->has_default_value()) {
614 // See comments in Internal.java for gory details.
615 return strings::Substitute(
616 "com.google.protobuf.Internal.bytesDefaultValue(\"$0\")",
617 CEscape(field->default_value_string()));
618 } else {
619 return "com.google.protobuf.ByteString.EMPTY";
620 }
621 } else {
622 if (AllAscii(field->default_value_string())) {
623 // All chars are ASCII. In this case CEscape() works fine.
624 return "\"" + CEscape(field->default_value_string()) + "\"";
625 } else {
626 // See comments in Internal.java for gory details.
627 return strings::Substitute(
628 "com.google.protobuf.Internal.stringDefaultValue(\"$0\")",
629 CEscape(field->default_value_string()));
630 }
631 }
632
633 case FieldDescriptor::CPPTYPE_ENUM:
634 return name_resolver->GetClassName(field->enum_type(), immutable) + "." +
635 field->default_value_enum()->name();
636
637 case FieldDescriptor::CPPTYPE_MESSAGE:
638 return name_resolver->GetClassName(field->message_type(), immutable) +
639 ".getDefaultInstance()";
640
641 // No default because we want the compiler to complain if any new
642 // types are added.
643 }
644
645 GOOGLE_LOG(FATAL) << "Can't get here.";
646 return "";
647 }
648
IsDefaultValueJavaDefault(const FieldDescriptor * field)649 bool IsDefaultValueJavaDefault(const FieldDescriptor* field) {
650 // Switch on CppType since we need to know which default_value_* method
651 // of FieldDescriptor to call.
652 switch (field->cpp_type()) {
653 case FieldDescriptor::CPPTYPE_INT32:
654 return field->default_value_int32() == 0;
655 case FieldDescriptor::CPPTYPE_UINT32:
656 return field->default_value_uint32() == 0;
657 case FieldDescriptor::CPPTYPE_INT64:
658 return field->default_value_int64() == 0L;
659 case FieldDescriptor::CPPTYPE_UINT64:
660 return field->default_value_uint64() == 0L;
661 case FieldDescriptor::CPPTYPE_DOUBLE:
662 return field->default_value_double() == 0.0;
663 case FieldDescriptor::CPPTYPE_FLOAT:
664 return field->default_value_float() == 0.0;
665 case FieldDescriptor::CPPTYPE_BOOL:
666 return field->default_value_bool() == false;
667 case FieldDescriptor::CPPTYPE_ENUM:
668 return field->default_value_enum()->number() == 0;
669 case FieldDescriptor::CPPTYPE_STRING:
670 case FieldDescriptor::CPPTYPE_MESSAGE:
671 return false;
672
673 // No default because we want the compiler to complain if any new
674 // types are added.
675 }
676
677 GOOGLE_LOG(FATAL) << "Can't get here.";
678 return false;
679 }
680
IsByteStringWithCustomDefaultValue(const FieldDescriptor * field)681 bool IsByteStringWithCustomDefaultValue(const FieldDescriptor* field) {
682 return GetJavaType(field) == JAVATYPE_BYTES &&
683 field->default_value_string() != "";
684 }
685
686 const char* bit_masks[] = {
687 "0x00000001", "0x00000002", "0x00000004", "0x00000008",
688 "0x00000010", "0x00000020", "0x00000040", "0x00000080",
689
690 "0x00000100", "0x00000200", "0x00000400", "0x00000800",
691 "0x00001000", "0x00002000", "0x00004000", "0x00008000",
692
693 "0x00010000", "0x00020000", "0x00040000", "0x00080000",
694 "0x00100000", "0x00200000", "0x00400000", "0x00800000",
695
696 "0x01000000", "0x02000000", "0x04000000", "0x08000000",
697 "0x10000000", "0x20000000", "0x40000000", "0x80000000",
698 };
699
GetBitFieldName(int index)700 std::string GetBitFieldName(int index) {
701 std::string varName = "bitField";
702 varName += StrCat(index);
703 varName += "_";
704 return varName;
705 }
706
GetBitFieldNameForBit(int bitIndex)707 std::string GetBitFieldNameForBit(int bitIndex) {
708 return GetBitFieldName(bitIndex / 32);
709 }
710
711 namespace {
712
GenerateGetBitInternal(const std::string & prefix,int bitIndex)713 std::string GenerateGetBitInternal(const std::string& prefix, int bitIndex) {
714 std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
715 int bitInVarIndex = bitIndex % 32;
716
717 std::string mask = bit_masks[bitInVarIndex];
718 std::string result = "((" + varName + " & " + mask + ") != 0)";
719 return result;
720 }
721
GenerateSetBitInternal(const std::string & prefix,int bitIndex)722 std::string GenerateSetBitInternal(const std::string& prefix, int bitIndex) {
723 std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
724 int bitInVarIndex = bitIndex % 32;
725
726 std::string mask = bit_masks[bitInVarIndex];
727 std::string result = varName + " |= " + mask;
728 return result;
729 }
730
731 } // namespace
732
GenerateGetBit(int bitIndex)733 std::string GenerateGetBit(int bitIndex) {
734 return GenerateGetBitInternal("", bitIndex);
735 }
736
GenerateSetBit(int bitIndex)737 std::string GenerateSetBit(int bitIndex) {
738 return GenerateSetBitInternal("", bitIndex);
739 }
740
GenerateClearBit(int bitIndex)741 std::string GenerateClearBit(int bitIndex) {
742 std::string varName = GetBitFieldNameForBit(bitIndex);
743 int bitInVarIndex = bitIndex % 32;
744
745 std::string mask = bit_masks[bitInVarIndex];
746 std::string result = varName + " = (" + varName + " & ~" + mask + ")";
747 return result;
748 }
749
GenerateGetBitFromLocal(int bitIndex)750 std::string GenerateGetBitFromLocal(int bitIndex) {
751 return GenerateGetBitInternal("from_", bitIndex);
752 }
753
GenerateSetBitToLocal(int bitIndex)754 std::string GenerateSetBitToLocal(int bitIndex) {
755 return GenerateSetBitInternal("to_", bitIndex);
756 }
757
GenerateGetBitMutableLocal(int bitIndex)758 std::string GenerateGetBitMutableLocal(int bitIndex) {
759 return GenerateGetBitInternal("mutable_", bitIndex);
760 }
761
GenerateSetBitMutableLocal(int bitIndex)762 std::string GenerateSetBitMutableLocal(int bitIndex) {
763 return GenerateSetBitInternal("mutable_", bitIndex);
764 }
765
IsReferenceType(JavaType type)766 bool IsReferenceType(JavaType type) {
767 switch (type) {
768 case JAVATYPE_INT:
769 return false;
770 case JAVATYPE_LONG:
771 return false;
772 case JAVATYPE_FLOAT:
773 return false;
774 case JAVATYPE_DOUBLE:
775 return false;
776 case JAVATYPE_BOOLEAN:
777 return false;
778 case JAVATYPE_STRING:
779 return true;
780 case JAVATYPE_BYTES:
781 return true;
782 case JAVATYPE_ENUM:
783 return true;
784 case JAVATYPE_MESSAGE:
785 return true;
786
787 // No default because we want the compiler to complain if any new
788 // JavaTypes are added.
789 }
790
791 GOOGLE_LOG(FATAL) << "Can't get here.";
792 return false;
793 }
794
GetCapitalizedType(const FieldDescriptor * field,bool immutable)795 const char* GetCapitalizedType(const FieldDescriptor* field, bool immutable) {
796 switch (GetType(field)) {
797 case FieldDescriptor::TYPE_INT32:
798 return "Int32";
799 case FieldDescriptor::TYPE_UINT32:
800 return "UInt32";
801 case FieldDescriptor::TYPE_SINT32:
802 return "SInt32";
803 case FieldDescriptor::TYPE_FIXED32:
804 return "Fixed32";
805 case FieldDescriptor::TYPE_SFIXED32:
806 return "SFixed32";
807 case FieldDescriptor::TYPE_INT64:
808 return "Int64";
809 case FieldDescriptor::TYPE_UINT64:
810 return "UInt64";
811 case FieldDescriptor::TYPE_SINT64:
812 return "SInt64";
813 case FieldDescriptor::TYPE_FIXED64:
814 return "Fixed64";
815 case FieldDescriptor::TYPE_SFIXED64:
816 return "SFixed64";
817 case FieldDescriptor::TYPE_FLOAT:
818 return "Float";
819 case FieldDescriptor::TYPE_DOUBLE:
820 return "Double";
821 case FieldDescriptor::TYPE_BOOL:
822 return "Bool";
823 case FieldDescriptor::TYPE_STRING:
824 return "String";
825 case FieldDescriptor::TYPE_BYTES: {
826 return "Bytes";
827 }
828 case FieldDescriptor::TYPE_ENUM:
829 return "Enum";
830 case FieldDescriptor::TYPE_GROUP:
831 return "Group";
832 case FieldDescriptor::TYPE_MESSAGE:
833 return "Message";
834
835 // No default because we want the compiler to complain if any new
836 // types are added.
837 }
838
839 GOOGLE_LOG(FATAL) << "Can't get here.";
840 return NULL;
841 }
842
843 // For encodings with fixed sizes, returns that size in bytes. Otherwise
844 // returns -1.
FixedSize(FieldDescriptor::Type type)845 int FixedSize(FieldDescriptor::Type type) {
846 switch (type) {
847 case FieldDescriptor::TYPE_INT32:
848 return -1;
849 case FieldDescriptor::TYPE_INT64:
850 return -1;
851 case FieldDescriptor::TYPE_UINT32:
852 return -1;
853 case FieldDescriptor::TYPE_UINT64:
854 return -1;
855 case FieldDescriptor::TYPE_SINT32:
856 return -1;
857 case FieldDescriptor::TYPE_SINT64:
858 return -1;
859 case FieldDescriptor::TYPE_FIXED32:
860 return WireFormatLite::kFixed32Size;
861 case FieldDescriptor::TYPE_FIXED64:
862 return WireFormatLite::kFixed64Size;
863 case FieldDescriptor::TYPE_SFIXED32:
864 return WireFormatLite::kSFixed32Size;
865 case FieldDescriptor::TYPE_SFIXED64:
866 return WireFormatLite::kSFixed64Size;
867 case FieldDescriptor::TYPE_FLOAT:
868 return WireFormatLite::kFloatSize;
869 case FieldDescriptor::TYPE_DOUBLE:
870 return WireFormatLite::kDoubleSize;
871
872 case FieldDescriptor::TYPE_BOOL:
873 return WireFormatLite::kBoolSize;
874 case FieldDescriptor::TYPE_ENUM:
875 return -1;
876
877 case FieldDescriptor::TYPE_STRING:
878 return -1;
879 case FieldDescriptor::TYPE_BYTES:
880 return -1;
881 case FieldDescriptor::TYPE_GROUP:
882 return -1;
883 case FieldDescriptor::TYPE_MESSAGE:
884 return -1;
885
886 // No default because we want the compiler to complain if any new
887 // types are added.
888 }
889 GOOGLE_LOG(FATAL) << "Can't get here.";
890 return -1;
891 }
892
893 // Sort the fields of the given Descriptor by number into a new[]'d array
894 // and return it. The caller should delete the returned array.
SortFieldsByNumber(const Descriptor * descriptor)895 const FieldDescriptor** SortFieldsByNumber(const Descriptor* descriptor) {
896 const FieldDescriptor** fields =
897 new const FieldDescriptor*[descriptor->field_count()];
898 for (int i = 0; i < descriptor->field_count(); i++) {
899 fields[i] = descriptor->field(i);
900 }
901 std::sort(fields, fields + descriptor->field_count(),
902 FieldOrderingByNumber());
903 return fields;
904 }
905
906 // Returns true if the message type has any required fields. If it doesn't,
907 // we can optimize out calls to its isInitialized() method.
908 //
909 // already_seen is used to avoid checking the same type multiple times
910 // (and also to protect against recursion).
HasRequiredFields(const Descriptor * type,std::unordered_set<const Descriptor * > * already_seen)911 bool HasRequiredFields(const Descriptor* type,
912 std::unordered_set<const Descriptor*>* already_seen) {
913 if (already_seen->count(type) > 0) {
914 // The type is already in cache. This means that either:
915 // a. The type has no required fields.
916 // b. We are in the midst of checking if the type has required fields,
917 // somewhere up the stack. In this case, we know that if the type
918 // has any required fields, they'll be found when we return to it,
919 // and the whole call to HasRequiredFields() will return true.
920 // Therefore, we don't have to check if this type has required fields
921 // here.
922 return false;
923 }
924 already_seen->insert(type);
925
926 // If the type has extensions, an extension with message type could contain
927 // required fields, so we have to be conservative and assume such an
928 // extension exists.
929 if (type->extension_range_count() > 0) return true;
930
931 for (int i = 0; i < type->field_count(); i++) {
932 const FieldDescriptor* field = type->field(i);
933 if (field->is_required()) {
934 return true;
935 }
936 if (GetJavaType(field) == JAVATYPE_MESSAGE) {
937 if (HasRequiredFields(field->message_type(), already_seen)) {
938 return true;
939 }
940 }
941 }
942
943 return false;
944 }
945
HasRequiredFields(const Descriptor * type)946 bool HasRequiredFields(const Descriptor* type) {
947 std::unordered_set<const Descriptor*> already_seen;
948 return HasRequiredFields(type, &already_seen);
949 }
950
HasRepeatedFields(const Descriptor * descriptor)951 bool HasRepeatedFields(const Descriptor* descriptor) {
952 for (int i = 0; i < descriptor->field_count(); ++i) {
953 const FieldDescriptor* field = descriptor->field(i);
954 if (field->is_repeated()) {
955 return true;
956 }
957 }
958 return false;
959 }
960
961 // Encode an unsigned 32-bit value into a sequence of UTF-16 characters.
962 //
963 // If the value is in [0x0000, 0xD7FF], we encode it with a single character
964 // with the same numeric value.
965 //
966 // If the value is larger than 0xD7FF, we encode its lowest 13 bits into a
967 // character in the range [0xE000, 0xFFFF] by combining these 13 bits with
968 // 0xE000 using logic-or. Then we shift the value to the right by 13 bits, and
969 // encode the remaining value by repeating this same process until we get to
970 // a value in [0x0000, 0xD7FF] where we will encode it using a character with
971 // the same numeric value.
972 //
973 // Note that we only use code points in [0x0000, 0xD7FF] and [0xE000, 0xFFFF].
974 // There will be no surrogate pairs in the encoded character sequence.
WriteUInt32ToUtf16CharSequence(uint32_t number,std::vector<uint16_t> * output)975 void WriteUInt32ToUtf16CharSequence(uint32_t number,
976 std::vector<uint16_t>* output) {
977 // For values in [0x0000, 0xD7FF], only use one char to encode it.
978 if (number < 0xD800) {
979 output->push_back(static_cast<uint16_t>(number));
980 return;
981 }
982 // Encode into multiple chars. All except the last char will be in the range
983 // [0xE000, 0xFFFF], and the last char will be in the range [0x0000, 0xD7FF].
984 // Note that we don't use any value in range [0xD800, 0xDFFF] because they
985 // have to come in pairs and the encoding is just more space-efficient w/o
986 // them.
987 while (number >= 0xD800) {
988 // [0xE000, 0xFFFF] can represent 13 bits of info.
989 output->push_back(static_cast<uint16_t>(0xE000 | (number & 0x1FFF)));
990 number >>= 13;
991 }
992 output->push_back(static_cast<uint16_t>(number));
993 }
994
GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor * field)995 int GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor* field) {
996 // j/c/g/protobuf/FieldType.java lists field types in a slightly different
997 // order from FieldDescriptor::Type so we can't do a simple cast.
998 //
999 // TODO(xiaofeng): Make j/c/g/protobuf/FieldType.java follow the same order.
1000 int result = field->type();
1001 if (result == FieldDescriptor::TYPE_GROUP) {
1002 return 17;
1003 } else if (result < FieldDescriptor::TYPE_GROUP) {
1004 return result - 1;
1005 } else {
1006 return result - 2;
1007 }
1008 }
1009
GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor * field)1010 int GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor* field) {
1011 if (field->type() == FieldDescriptor::TYPE_GROUP) {
1012 return 49;
1013 } else {
1014 return GetExperimentalJavaFieldTypeForSingular(field) + 18;
1015 }
1016 }
1017
GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor * field)1018 int GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor* field) {
1019 int result = field->type();
1020 if (result < FieldDescriptor::TYPE_STRING) {
1021 return result + 34;
1022 } else if (result > FieldDescriptor::TYPE_BYTES) {
1023 return result + 30;
1024 } else {
1025 GOOGLE_LOG(FATAL) << field->full_name() << " can't be packed.";
1026 return 0;
1027 }
1028 }
1029
GetExperimentalJavaFieldType(const FieldDescriptor * field)1030 int GetExperimentalJavaFieldType(const FieldDescriptor* field) {
1031 static const int kMapFieldType = 50;
1032 static const int kOneofFieldTypeOffset = 51;
1033 static const int kRequiredBit = 0x100;
1034 static const int kUtf8CheckBit = 0x200;
1035 static const int kCheckInitialized = 0x400;
1036 static const int kMapWithProto2EnumValue = 0x800;
1037 static const int kHasHasBit = 0x1000;
1038 int extra_bits = field->is_required() ? kRequiredBit : 0;
1039 if (field->type() == FieldDescriptor::TYPE_STRING && CheckUtf8(field)) {
1040 extra_bits |= kUtf8CheckBit;
1041 }
1042 if (field->is_required() || (GetJavaType(field) == JAVATYPE_MESSAGE &&
1043 HasRequiredFields(field->message_type()))) {
1044 extra_bits |= kCheckInitialized;
1045 }
1046 if (HasHasbit(field)) {
1047 extra_bits |= kHasHasBit;
1048 }
1049
1050 if (field->is_map()) {
1051 if (!SupportUnknownEnumValue(field)) {
1052 const FieldDescriptor* value =
1053 field->message_type()->FindFieldByName("value");
1054 if (GetJavaType(value) == JAVATYPE_ENUM) {
1055 extra_bits |= kMapWithProto2EnumValue;
1056 }
1057 }
1058 return kMapFieldType | extra_bits;
1059 } else if (field->is_packed()) {
1060 return GetExperimentalJavaFieldTypeForPacked(field);
1061 } else if (field->is_repeated()) {
1062 return GetExperimentalJavaFieldTypeForRepeated(field) | extra_bits;
1063 } else if (IsRealOneof(field)) {
1064 return (GetExperimentalJavaFieldTypeForSingular(field) +
1065 kOneofFieldTypeOffset) |
1066 extra_bits;
1067 } else {
1068 return GetExperimentalJavaFieldTypeForSingular(field) | extra_bits;
1069 }
1070 }
1071
1072 // Escape a UTF-16 character to be embedded in a Java string.
EscapeUtf16ToString(uint16_t code,std::string * output)1073 void EscapeUtf16ToString(uint16_t code, std::string* output) {
1074 if (code == '\t') {
1075 output->append("\\t");
1076 } else if (code == '\b') {
1077 output->append("\\b");
1078 } else if (code == '\n') {
1079 output->append("\\n");
1080 } else if (code == '\r') {
1081 output->append("\\r");
1082 } else if (code == '\f') {
1083 output->append("\\f");
1084 } else if (code == '\'') {
1085 output->append("\\'");
1086 } else if (code == '\"') {
1087 output->append("\\\"");
1088 } else if (code == '\\') {
1089 output->append("\\\\");
1090 } else if (code >= 0x20 && code <= 0x7f) {
1091 output->push_back(static_cast<char>(code));
1092 } else {
1093 output->append(StringPrintf("\\u%04x", code));
1094 }
1095 }
1096
1097 } // namespace java
1098 } // namespace compiler
1099 } // namespace protobuf
1100 } // namespace google
1101