1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: robinson@google.com (Will Robinson)
32 //
33 // This module outputs pure-Python protocol message classes that will
34 // largely be constructed at runtime via the metaclass in reflection.py.
35 // In other words, our job is basically to output a Python equivalent
36 // of the C++ *Descriptor objects, and fix up all circular references
37 // within these objects.
38 //
39 // Note that the runtime performance of protocol message classes created in
40 // this way is expected to be lousy.  The plan is to create an alternate
41 // generator that outputs a Python/C extension module that lets
42 // performance-minded Python code leverage the fast C++ implementation
43 // directly.
44 
45 #include <google/protobuf/compiler/python/python_generator.h>
46 
47 #include <algorithm>
48 #include <limits>
49 #include <map>
50 #include <memory>
51 #include <string>
52 #include <utility>
53 #include <vector>
54 
55 #include <google/protobuf/stubs/logging.h>
56 #include <google/protobuf/stubs/common.h>
57 #include <google/protobuf/stubs/stringprintf.h>
58 #include <google/protobuf/descriptor.pb.h>
59 #include <google/protobuf/io/printer.h>
60 #include <google/protobuf/io/zero_copy_stream.h>
61 #include <google/protobuf/descriptor.h>
62 #include <google/protobuf/stubs/strutil.h>
63 #include <google/protobuf/stubs/substitute.h>
64 
65 namespace google {
66 namespace protobuf {
67 namespace compiler {
68 namespace python {
69 
70 namespace {
71 
72 
73 // Returns the Python module name expected for a given .proto filename.
ModuleName(const std::string & filename)74 std::string ModuleName(const std::string& filename) {
75   std::string basename = StripProto(filename);
76   ReplaceCharacters(&basename, "-", '_');
77   ReplaceCharacters(&basename, "/", '.');
78   return basename + "_pb2";
79 }
80 
81 // Returns the alias we assign to the module of the given .proto filename
82 // when importing. See testPackageInitializationImport in
83 // net/proto2/python/internal/reflection_test.py
84 // to see why we need the alias.
ModuleAlias(const std::string & filename)85 std::string ModuleAlias(const std::string& filename) {
86   std::string module_name = ModuleName(filename);
87   // We can't have dots in the module name, so we replace each with _dot_.
88   // But that could lead to a collision between a.b and a_dot_b, so we also
89   // duplicate each underscore.
90   GlobalReplaceSubstring("_", "__", &module_name);
91   GlobalReplaceSubstring(".", "_dot_", &module_name);
92   return module_name;
93 }
94 
95 // Keywords reserved by the Python language.
96 const char* const kKeywords[] = {
97     "False",  "None",     "True",  "and",    "as",       "assert",
98     "async",  "await",    "break", "class",  "continue", "def",
99     "del",    "elif",     "else",  "except", "finally",  "for",
100     "from",   "global",   "if",    "import", "in",       "is",
101     "lambda", "nonlocal", "not",   "or",     "pass",     "raise",
102     "return", "try",      "while", "with",   "yield",    "print",
103 };
104 const char* const* kKeywordsEnd =
105     kKeywords + (sizeof(kKeywords) / sizeof(kKeywords[0]));
106 
ContainsPythonKeyword(const std::string & module_name)107 bool ContainsPythonKeyword(const std::string& module_name) {
108   std::vector<std::string> tokens = Split(module_name, ".");
109   for (int i = 0; i < tokens.size(); ++i) {
110     if (std::find(kKeywords, kKeywordsEnd, tokens[i]) != kKeywordsEnd) {
111       return true;
112     }
113   }
114   return false;
115 }
116 
IsPythonKeyword(const std::string & name)117 inline bool IsPythonKeyword(const std::string& name) {
118   return (std::find(kKeywords, kKeywordsEnd, name) != kKeywordsEnd);
119 }
120 
ResolveKeyword(const std::string & name)121 std::string ResolveKeyword(const std::string& name) {
122   if (IsPythonKeyword(name)) {
123     return "globals()['" + name + "']";
124   }
125   return name;
126 }
127 
128 // Returns the name of all containing types for descriptor,
129 // in order from outermost to innermost, followed by descriptor's
130 // own name.  Each name is separated by |separator|.
131 template <typename DescriptorT>
NamePrefixedWithNestedTypes(const DescriptorT & descriptor,const std::string & separator)132 std::string NamePrefixedWithNestedTypes(const DescriptorT& descriptor,
133                                         const std::string& separator) {
134   std::string name = descriptor.name();
135   const Descriptor* parent = descriptor.containing_type();
136   if (parent != nullptr) {
137     std::string prefix = NamePrefixedWithNestedTypes(*parent, separator);
138     if (separator == "." && IsPythonKeyword(name)) {
139       return "getattr(" + prefix + ", '" + name + "')";
140     } else {
141       return prefix + separator + name;
142     }
143   }
144   if (separator == ".") {
145     name = ResolveKeyword(name);
146   }
147   return name;
148 }
149 
150 // Name of the class attribute where we store the Python
151 // descriptor.Descriptor instance for the generated class.
152 // Must stay consistent with the _DESCRIPTOR_KEY constant
153 // in proto2/public/reflection.py.
154 const char kDescriptorKey[] = "DESCRIPTOR";
155 
156 // Does the file have top-level enums?
HasTopLevelEnums(const FileDescriptor * file)157 inline bool HasTopLevelEnums(const FileDescriptor* file) {
158   return file->enum_type_count() > 0;
159 }
160 
161 // Should we generate generic services for this file?
HasGenericServices(const FileDescriptor * file)162 inline bool HasGenericServices(const FileDescriptor* file) {
163   return file->service_count() > 0 && file->options().py_generic_services();
164 }
165 
166 // Prints the common boilerplate needed at the top of every .py
167 // file output by this generator.
PrintTopBoilerplate(io::Printer * printer,const FileDescriptor * file,bool descriptor_proto)168 void PrintTopBoilerplate(io::Printer* printer, const FileDescriptor* file,
169                          bool descriptor_proto) {
170   // TODO(robinson): Allow parameterization of Python version?
171   printer->Print(
172       "# -*- coding: utf-8 -*-\n"
173       "# Generated by the protocol buffer compiler.  DO NOT EDIT!\n"
174       "# source: $filename$\n"
175       "\"\"\"Generated protocol buffer code.\"\"\"\n",
176       "filename", file->name());
177   if (HasTopLevelEnums(file)) {
178     printer->Print(
179         "from google.protobuf.internal import enum_type_wrapper\n");
180   }
181   printer->Print(
182       "from google.protobuf import descriptor as _descriptor\n"
183       "from google.protobuf import message as _message\n"
184       "from google.protobuf import reflection as _reflection\n"
185       "from google.protobuf import symbol_database as "
186       "_symbol_database\n");
187   if (HasGenericServices(file)) {
188     printer->Print(
189         "from google.protobuf import service as _service\n"
190         "from google.protobuf import service_reflection\n");
191   }
192 
193   printer->Print(
194       "# @@protoc_insertion_point(imports)\n\n"
195       "_sym_db = _symbol_database.Default()\n");
196   printer->Print("\n\n");
197 }
198 
199 // Returns a Python literal giving the default value for a field.
200 // If the field specifies no explicit default value, we'll return
201 // the default default value for the field type (zero for numbers,
202 // empty string for strings, empty list for repeated fields, and
203 // None for non-repeated, composite fields).
204 //
205 // TODO(robinson): Unify with code from
206 // //compiler/cpp/internal/primitive_field.cc
207 // //compiler/cpp/internal/enum_field.cc
208 // //compiler/cpp/internal/string_field.cc
StringifyDefaultValue(const FieldDescriptor & field)209 std::string StringifyDefaultValue(const FieldDescriptor& field) {
210   if (field.is_repeated()) {
211     return "[]";
212   }
213 
214   switch (field.cpp_type()) {
215     case FieldDescriptor::CPPTYPE_INT32:
216       return StrCat(field.default_value_int32());
217     case FieldDescriptor::CPPTYPE_UINT32:
218       return StrCat(field.default_value_uint32());
219     case FieldDescriptor::CPPTYPE_INT64:
220       return StrCat(field.default_value_int64());
221     case FieldDescriptor::CPPTYPE_UINT64:
222       return StrCat(field.default_value_uint64());
223     case FieldDescriptor::CPPTYPE_DOUBLE: {
224       double value = field.default_value_double();
225       if (value == std::numeric_limits<double>::infinity()) {
226         // Python pre-2.6 on Windows does not parse "inf" correctly.  However,
227         // a numeric literal that is too big for a double will become infinity.
228         return "1e10000";
229       } else if (value == -std::numeric_limits<double>::infinity()) {
230         // See above.
231         return "-1e10000";
232       } else if (value != value) {
233         // infinity * 0 = nan
234         return "(1e10000 * 0)";
235       } else {
236         return "float(" + SimpleDtoa(value) + ")";
237       }
238     }
239     case FieldDescriptor::CPPTYPE_FLOAT: {
240       float value = field.default_value_float();
241       if (value == std::numeric_limits<float>::infinity()) {
242         // Python pre-2.6 on Windows does not parse "inf" correctly.  However,
243         // a numeric literal that is too big for a double will become infinity.
244         return "1e10000";
245       } else if (value == -std::numeric_limits<float>::infinity()) {
246         // See above.
247         return "-1e10000";
248       } else if (value != value) {
249         // infinity - infinity = nan
250         return "(1e10000 * 0)";
251       } else {
252         return "float(" + SimpleFtoa(value) + ")";
253       }
254     }
255     case FieldDescriptor::CPPTYPE_BOOL:
256       return field.default_value_bool() ? "True" : "False";
257     case FieldDescriptor::CPPTYPE_ENUM:
258       return StrCat(field.default_value_enum()->number());
259     case FieldDescriptor::CPPTYPE_STRING:
260       return "b\"" + CEscape(field.default_value_string()) +
261              (field.type() != FieldDescriptor::TYPE_STRING
262                   ? "\""
263                   : "\".decode('utf-8')");
264     case FieldDescriptor::CPPTYPE_MESSAGE:
265       return "None";
266   }
267   // (We could add a default case above but then we wouldn't get the nice
268   // compiler warning when a new type is added.)
269   GOOGLE_LOG(FATAL) << "Not reached.";
270   return "";
271 }
272 
StringifySyntax(FileDescriptor::Syntax syntax)273 std::string StringifySyntax(FileDescriptor::Syntax syntax) {
274   switch (syntax) {
275     case FileDescriptor::SYNTAX_PROTO2:
276       return "proto2";
277     case FileDescriptor::SYNTAX_PROTO3:
278       return "proto3";
279     case FileDescriptor::SYNTAX_UNKNOWN:
280     default:
281       GOOGLE_LOG(FATAL) << "Unsupported syntax; this generator only supports proto2 "
282                     "and proto3 syntax.";
283       return "";
284   }
285 }
286 
287 }  // namespace
288 
Generator()289 Generator::Generator() : file_(nullptr) {}
290 
~Generator()291 Generator::~Generator() {}
292 
GetSupportedFeatures() const293 uint64_t Generator::GetSupportedFeatures() const {
294   return CodeGenerator::Feature::FEATURE_PROTO3_OPTIONAL;
295 }
296 
Generate(const FileDescriptor * file,const std::string & parameter,GeneratorContext * context,std::string * error) const297 bool Generator::Generate(const FileDescriptor* file,
298                          const std::string& parameter,
299                          GeneratorContext* context, std::string* error) const {
300   // -----------------------------------------------------------------
301   // parse generator options
302   bool cpp_generated_lib_linked = false;
303 
304   std::vector<std::pair<std::string, std::string> > options;
305   ParseGeneratorParameter(parameter, &options);
306 
307   for (int i = 0; i < options.size(); i++) {
308     if (options[i].first == "cpp_generated_lib_linked") {
309       cpp_generated_lib_linked = true;
310     } else {
311       *error = "Unknown generator option: " + options[i].first;
312       return false;
313     }
314   }
315 
316   // Completely serialize all Generate() calls on this instance.  The
317   // thread-safety constraints of the CodeGenerator interface aren't clear so
318   // just be as conservative as possible.  It's easier to relax this later if
319   // we need to, but I doubt it will be an issue.
320   // TODO(kenton):  The proper thing to do would be to allocate any state on
321   //   the stack and use that, so that the Generator class itself does not need
322   //   to have any mutable members.  Then it is implicitly thread-safe.
323   MutexLock lock(&mutex_);
324   file_ = file;
325   std::string module_name = ModuleName(file->name());
326   std::string filename = module_name;
327   ReplaceCharacters(&filename, ".", '/');
328   filename += ".py";
329 
330   pure_python_workable_ = !cpp_generated_lib_linked;
331   if (HasPrefixString(file->name(), "google/protobuf/")) {
332     pure_python_workable_ = true;
333   }
334 
335   FileDescriptorProto fdp;
336   file_->CopyTo(&fdp);
337   fdp.SerializeToString(&file_descriptor_serialized_);
338 
339 
340   std::unique_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
341   GOOGLE_CHECK(output.get());
342   io::Printer printer(output.get(), '$');
343   printer_ = &printer;
344 
345   PrintTopBoilerplate(printer_, file_, GeneratingDescriptorProto());
346   if (pure_python_workable_) {
347     PrintImports();
348   }
349   PrintFileDescriptor();
350   PrintTopLevelEnums();
351   PrintTopLevelExtensions();
352   if (pure_python_workable_) {
353     PrintAllNestedEnumsInFile();
354     PrintMessageDescriptors();
355     FixForeignFieldsInDescriptors();
356   }
357   PrintMessages();
358   if (pure_python_workable_) {
359     // We have to fix up the extensions after the message classes themselves,
360     // since they need to call static RegisterExtension() methods on these
361     // classes.
362     FixForeignFieldsInExtensions();
363     // Descriptor options may have custom extensions. These custom options
364     // can only be successfully parsed after we register corresponding
365     // extensions. Therefore we parse all options again here to recognize
366     // custom options that may be unknown when we define the descriptors.
367     // This does not apply to services because they are not used by extensions.
368     FixAllDescriptorOptions();
369     PrintServiceDescriptors();
370   }
371   if (HasGenericServices(file)) {
372     PrintServices();
373   }
374 
375   printer.Print("# @@protoc_insertion_point(module_scope)\n");
376 
377   return !printer.failed();
378 }
379 
380 
381 // Prints Python imports for all modules imported by |file|.
PrintImports() const382 void Generator::PrintImports() const {
383   for (int i = 0; i < file_->dependency_count(); ++i) {
384     const std::string& filename = file_->dependency(i)->name();
385 
386     std::string module_name = ModuleName(filename);
387     std::string module_alias = ModuleAlias(filename);
388     if (ContainsPythonKeyword(module_name)) {
389       // If the module path contains a Python keyword, we have to quote the
390       // module name and import it using importlib. Otherwise the usual kind of
391       // import statement would result in a syntax error from the presence of
392       // the keyword.
393       printer_->Print("import importlib\n");
394       printer_->Print("$alias$ = importlib.import_module('$name$')\n", "alias",
395                       module_alias, "name", module_name);
396     } else {
397       int last_dot_pos = module_name.rfind('.');
398       std::string import_statement;
399       if (last_dot_pos == std::string::npos) {
400         // NOTE(petya): this is not tested as it would require a protocol buffer
401         // outside of any package, and I don't think that is easily achievable.
402         import_statement = "import " + module_name;
403       } else {
404         import_statement = "from " + module_name.substr(0, last_dot_pos) +
405                            " import " + module_name.substr(last_dot_pos + 1);
406       }
407       printer_->Print("$statement$ as $alias$\n", "statement", import_statement,
408                       "alias", module_alias);
409     }
410 
411     CopyPublicDependenciesAliases(module_alias, file_->dependency(i));
412   }
413   printer_->Print("\n");
414 
415   // Print public imports.
416   for (int i = 0; i < file_->public_dependency_count(); ++i) {
417     std::string module_name = ModuleName(file_->public_dependency(i)->name());
418     printer_->Print("from $module$ import *\n", "module", module_name);
419   }
420   printer_->Print("\n");
421 }
422 
423 // Prints the single file descriptor for this file.
PrintFileDescriptor() const424 void Generator::PrintFileDescriptor() const {
425   std::map<std::string, std::string> m;
426   m["descriptor_name"] = kDescriptorKey;
427   m["name"] = file_->name();
428   m["package"] = file_->package();
429   m["syntax"] = StringifySyntax(file_->syntax());
430   m["options"] = OptionsValue(file_->options().SerializeAsString());
431   const char file_descriptor_template[] =
432       "$descriptor_name$ = _descriptor.FileDescriptor(\n"
433       "  name='$name$',\n"
434       "  package='$package$',\n"
435       "  syntax='$syntax$',\n"
436       "  serialized_options=$options$,\n"
437       "  create_key=_descriptor._internal_create_key,\n";
438   printer_->Print(m, file_descriptor_template);
439   printer_->Indent();
440   if (pure_python_workable_) {
441     printer_->Print("serialized_pb=b'$value$'\n", "value",
442                     strings::CHexEscape(file_descriptor_serialized_));
443     if (file_->dependency_count() != 0) {
444       printer_->Print(",\ndependencies=[");
445       for (int i = 0; i < file_->dependency_count(); ++i) {
446         std::string module_alias = ModuleAlias(file_->dependency(i)->name());
447         printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
448                         module_alias);
449       }
450       printer_->Print("]");
451     }
452     if (file_->public_dependency_count() > 0) {
453       printer_->Print(",\npublic_dependencies=[");
454       for (int i = 0; i < file_->public_dependency_count(); ++i) {
455         std::string module_alias =
456             ModuleAlias(file_->public_dependency(i)->name());
457         printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
458                         module_alias);
459       }
460       printer_->Print("]");
461     }
462   } else {
463     printer_->Print("serialized_pb=''\n");
464   }
465 
466   // TODO(falk): Also print options and fix the message_type, enum_type,
467   //             service and extension later in the generation.
468 
469   printer_->Outdent();
470   printer_->Print(")\n");
471   printer_->Print("\n");
472 }
473 
474 // Prints descriptors and module-level constants for all top-level
475 // enums defined in |file|.
PrintTopLevelEnums() const476 void Generator::PrintTopLevelEnums() const {
477   std::vector<std::pair<std::string, int> > top_level_enum_values;
478   for (int i = 0; i < file_->enum_type_count(); ++i) {
479     const EnumDescriptor& enum_descriptor = *file_->enum_type(i);
480     PrintEnum(enum_descriptor);
481     printer_->Print(
482         "$name$ = "
483         "enum_type_wrapper.EnumTypeWrapper($descriptor_name$)",
484         "name", ResolveKeyword(enum_descriptor.name()), "descriptor_name",
485         ModuleLevelDescriptorName(enum_descriptor));
486     printer_->Print("\n");
487 
488     for (int j = 0; j < enum_descriptor.value_count(); ++j) {
489       const EnumValueDescriptor& value_descriptor = *enum_descriptor.value(j);
490       top_level_enum_values.push_back(
491           std::make_pair(value_descriptor.name(), value_descriptor.number()));
492     }
493   }
494 
495   for (int i = 0; i < top_level_enum_values.size(); ++i) {
496     printer_->Print("$name$ = $value$\n", "name",
497                     ResolveKeyword(top_level_enum_values[i].first), "value",
498                     StrCat(top_level_enum_values[i].second));
499   }
500   printer_->Print("\n");
501 }
502 
503 // Prints all enums contained in all message types in |file|.
PrintAllNestedEnumsInFile() const504 void Generator::PrintAllNestedEnumsInFile() const {
505   for (int i = 0; i < file_->message_type_count(); ++i) {
506     PrintNestedEnums(*file_->message_type(i));
507   }
508 }
509 
510 // Prints a Python statement assigning the appropriate module-level
511 // enum name to a Python EnumDescriptor object equivalent to
512 // enum_descriptor.
PrintEnum(const EnumDescriptor & enum_descriptor) const513 void Generator::PrintEnum(const EnumDescriptor& enum_descriptor) const {
514   std::map<std::string, std::string> m;
515   std::string module_level_descriptor_name =
516       ModuleLevelDescriptorName(enum_descriptor);
517   m["descriptor_name"] = module_level_descriptor_name;
518   m["name"] = enum_descriptor.name();
519   m["full_name"] = enum_descriptor.full_name();
520   m["file"] = kDescriptorKey;
521   const char enum_descriptor_template[] =
522       "$descriptor_name$ = _descriptor.EnumDescriptor(\n"
523       "  name='$name$',\n"
524       "  full_name='$full_name$',\n"
525       "  filename=None,\n"
526       "  file=$file$,\n"
527       "  create_key=_descriptor._internal_create_key,\n"
528       "  values=[\n";
529   std::string options_string;
530   enum_descriptor.options().SerializeToString(&options_string);
531   printer_->Print(m, enum_descriptor_template);
532   printer_->Indent();
533   printer_->Indent();
534 
535   if (pure_python_workable_) {
536     for (int i = 0; i < enum_descriptor.value_count(); ++i) {
537       PrintEnumValueDescriptor(*enum_descriptor.value(i));
538       printer_->Print(",\n");
539     }
540   }
541 
542   printer_->Outdent();
543   printer_->Print("],\n");
544   printer_->Print("containing_type=None,\n");
545   printer_->Print("serialized_options=$options_value$,\n", "options_value",
546                   OptionsValue(options_string));
547   EnumDescriptorProto edp;
548   PrintSerializedPbInterval(enum_descriptor, edp);
549   printer_->Outdent();
550   printer_->Print(")\n");
551   if (pure_python_workable_) {
552     printer_->Print("_sym_db.RegisterEnumDescriptor($name$)\n", "name",
553                     module_level_descriptor_name);
554   }
555   printer_->Print("\n");
556 }
557 
558 // Recursively prints enums in nested types within descriptor, then
559 // prints enums contained at the top level in descriptor.
PrintNestedEnums(const Descriptor & descriptor) const560 void Generator::PrintNestedEnums(const Descriptor& descriptor) const {
561   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
562     PrintNestedEnums(*descriptor.nested_type(i));
563   }
564 
565   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
566     PrintEnum(*descriptor.enum_type(i));
567   }
568 }
569 
PrintTopLevelExtensions() const570 void Generator::PrintTopLevelExtensions() const {
571   const bool is_extension = true;
572   for (int i = 0; i < file_->extension_count(); ++i) {
573     const FieldDescriptor& extension_field = *file_->extension(i);
574     std::string constant_name = extension_field.name() + "_FIELD_NUMBER";
575     ToUpper(&constant_name);
576     printer_->Print("$constant_name$ = $number$\n", "constant_name",
577                     constant_name, "number",
578                     StrCat(extension_field.number()));
579     printer_->Print("$name$ = ", "name",
580                     ResolveKeyword(extension_field.name()));
581     PrintFieldDescriptor(extension_field, is_extension);
582     printer_->Print("\n");
583   }
584   printer_->Print("\n");
585 }
586 
587 // Prints Python equivalents of all Descriptors in |file|.
PrintMessageDescriptors() const588 void Generator::PrintMessageDescriptors() const {
589   for (int i = 0; i < file_->message_type_count(); ++i) {
590     PrintDescriptor(*file_->message_type(i));
591     printer_->Print("\n");
592   }
593 }
594 
PrintServiceDescriptors() const595 void Generator::PrintServiceDescriptors() const {
596   for (int i = 0; i < file_->service_count(); ++i) {
597     PrintServiceDescriptor(*file_->service(i));
598     AddServiceToFileDescriptor(*file_->service(i));
599     printer_->Print("\n");
600   }
601 }
602 
PrintServices() const603 void Generator::PrintServices() const {
604   for (int i = 0; i < file_->service_count(); ++i) {
605     PrintServiceClass(*file_->service(i));
606     PrintServiceStub(*file_->service(i));
607     printer_->Print("\n");
608   }
609 }
610 
PrintServiceDescriptor(const ServiceDescriptor & descriptor) const611 void Generator::PrintServiceDescriptor(
612     const ServiceDescriptor& descriptor) const {
613   printer_->Print("\n");
614   std::string service_name = ModuleLevelServiceDescriptorName(descriptor);
615   std::string options_string;
616   descriptor.options().SerializeToString(&options_string);
617 
618   printer_->Print("$service_name$ = _descriptor.ServiceDescriptor(\n",
619                   "service_name", service_name);
620   printer_->Indent();
621   std::map<std::string, std::string> m;
622   m["name"] = descriptor.name();
623   m["full_name"] = descriptor.full_name();
624   m["file"] = kDescriptorKey;
625   m["index"] = StrCat(descriptor.index());
626   m["options_value"] = OptionsValue(options_string);
627   const char required_function_arguments[] =
628       "name='$name$',\n"
629       "full_name='$full_name$',\n"
630       "file=$file$,\n"
631       "index=$index$,\n"
632       "serialized_options=$options_value$,\n"
633       "create_key=_descriptor._internal_create_key,\n";
634   printer_->Print(m, required_function_arguments);
635 
636   ServiceDescriptorProto sdp;
637   PrintSerializedPbInterval(descriptor, sdp);
638 
639   printer_->Print("methods=[\n");
640   for (int i = 0; i < descriptor.method_count(); ++i) {
641     const MethodDescriptor* method = descriptor.method(i);
642     method->options().SerializeToString(&options_string);
643 
644     m.clear();
645     m["name"] = method->name();
646     m["full_name"] = method->full_name();
647     m["index"] = StrCat(method->index());
648     m["serialized_options"] = CEscape(options_string);
649     m["input_type"] = ModuleLevelDescriptorName(*(method->input_type()));
650     m["output_type"] = ModuleLevelDescriptorName(*(method->output_type()));
651     m["options_value"] = OptionsValue(options_string);
652     printer_->Print("_descriptor.MethodDescriptor(\n");
653     printer_->Indent();
654     printer_->Print(m,
655                     "name='$name$',\n"
656                     "full_name='$full_name$',\n"
657                     "index=$index$,\n"
658                     "containing_service=None,\n"
659                     "input_type=$input_type$,\n"
660                     "output_type=$output_type$,\n"
661                     "serialized_options=$options_value$,\n"
662                     "create_key=_descriptor._internal_create_key,\n");
663     printer_->Outdent();
664     printer_->Print("),\n");
665   }
666 
667   printer_->Outdent();
668   printer_->Print("])\n");
669   printer_->Print("_sym_db.RegisterServiceDescriptor($name$)\n", "name",
670                   service_name);
671   printer_->Print("\n");
672 }
673 
PrintDescriptorKeyAndModuleName(const ServiceDescriptor & descriptor) const674 void Generator::PrintDescriptorKeyAndModuleName(
675     const ServiceDescriptor& descriptor) const {
676   std::string name = ModuleLevelServiceDescriptorName(descriptor);
677   if (!pure_python_workable_) {
678     name = "_descriptor.ServiceDescriptor(full_name='" +
679            descriptor.full_name() + "')";
680   }
681   printer_->Print("$descriptor_key$ = $descriptor_name$,\n", "descriptor_key",
682                   kDescriptorKey, "descriptor_name", name);
683   std::string module_name = ModuleName(file_->name());
684   printer_->Print("__module__ = '$module_name$'\n", "module_name", module_name);
685 }
686 
PrintServiceClass(const ServiceDescriptor & descriptor) const687 void Generator::PrintServiceClass(const ServiceDescriptor& descriptor) const {
688   // Print the service.
689   printer_->Print(
690       "$class_name$ = service_reflection.GeneratedServiceType("
691       "'$class_name$', (_service.Service,), dict(\n",
692       "class_name", descriptor.name());
693   printer_->Indent();
694   Generator::PrintDescriptorKeyAndModuleName(descriptor);
695   printer_->Print("))\n\n");
696   printer_->Outdent();
697 }
698 
PrintServiceStub(const ServiceDescriptor & descriptor) const699 void Generator::PrintServiceStub(const ServiceDescriptor& descriptor) const {
700   // Print the service stub.
701   printer_->Print(
702       "$class_name$_Stub = "
703       "service_reflection.GeneratedServiceStubType("
704       "'$class_name$_Stub', ($class_name$,), dict(\n",
705       "class_name", descriptor.name());
706   printer_->Indent();
707   Generator::PrintDescriptorKeyAndModuleName(descriptor);
708   printer_->Print("))\n\n");
709   printer_->Outdent();
710 }
711 
712 // Prints statement assigning ModuleLevelDescriptorName(message_descriptor)
713 // to a Python Descriptor object for message_descriptor.
714 //
715 // Mutually recursive with PrintNestedDescriptors().
PrintDescriptor(const Descriptor & message_descriptor) const716 void Generator::PrintDescriptor(const Descriptor& message_descriptor) const {
717   PrintNestedDescriptors(message_descriptor);
718 
719   printer_->Print("\n");
720   printer_->Print("$descriptor_name$ = _descriptor.Descriptor(\n",
721                   "descriptor_name",
722                   ModuleLevelDescriptorName(message_descriptor));
723   printer_->Indent();
724   std::map<std::string, std::string> m;
725   m["name"] = message_descriptor.name();
726   m["full_name"] = message_descriptor.full_name();
727   m["file"] = kDescriptorKey;
728   const char required_function_arguments[] =
729       "name='$name$',\n"
730       "full_name='$full_name$',\n"
731       "filename=None,\n"
732       "file=$file$,\n"
733       "containing_type=None,\n"
734       "create_key=_descriptor._internal_create_key,\n";
735   printer_->Print(m, required_function_arguments);
736   PrintFieldsInDescriptor(message_descriptor);
737   PrintExtensionsInDescriptor(message_descriptor);
738 
739   // Nested types
740   printer_->Print("nested_types=[");
741   for (int i = 0; i < message_descriptor.nested_type_count(); ++i) {
742     const std::string nested_name =
743         ModuleLevelDescriptorName(*message_descriptor.nested_type(i));
744     printer_->Print("$name$, ", "name", nested_name);
745   }
746   printer_->Print("],\n");
747 
748   // Enum types
749   printer_->Print("enum_types=[\n");
750   printer_->Indent();
751   for (int i = 0; i < message_descriptor.enum_type_count(); ++i) {
752     const std::string descriptor_name =
753         ModuleLevelDescriptorName(*message_descriptor.enum_type(i));
754     printer_->Print(descriptor_name.c_str());
755     printer_->Print(",\n");
756   }
757   printer_->Outdent();
758   printer_->Print("],\n");
759   std::string options_string;
760   message_descriptor.options().SerializeToString(&options_string);
761   printer_->Print(
762       "serialized_options=$options_value$,\n"
763       "is_extendable=$extendable$,\n"
764       "syntax='$syntax$'",
765       "options_value", OptionsValue(options_string), "extendable",
766       message_descriptor.extension_range_count() > 0 ? "True" : "False",
767       "syntax", StringifySyntax(message_descriptor.file()->syntax()));
768   printer_->Print(",\n");
769 
770   // Extension ranges
771   printer_->Print("extension_ranges=[");
772   for (int i = 0; i < message_descriptor.extension_range_count(); ++i) {
773     const Descriptor::ExtensionRange* range =
774         message_descriptor.extension_range(i);
775     printer_->Print("($start$, $end$), ", "start", StrCat(range->start),
776                     "end", StrCat(range->end));
777   }
778   printer_->Print("],\n");
779   printer_->Print("oneofs=[\n");
780   printer_->Indent();
781   for (int i = 0; i < message_descriptor.oneof_decl_count(); ++i) {
782     const OneofDescriptor* desc = message_descriptor.oneof_decl(i);
783     std::map<std::string, std::string> m;
784     m["name"] = desc->name();
785     m["full_name"] = desc->full_name();
786     m["index"] = StrCat(desc->index());
787     std::string options_string =
788         OptionsValue(desc->options().SerializeAsString());
789     if (options_string == "None") {
790       m["serialized_options"] = "";
791     } else {
792       m["serialized_options"] = ", serialized_options=" + options_string;
793     }
794     printer_->Print(m,
795                     "_descriptor.OneofDescriptor(\n"
796                     "  name='$name$', full_name='$full_name$',\n"
797                     "  index=$index$, containing_type=None,\n"
798                     "  create_key=_descriptor._internal_create_key,\n"
799                     "fields=[]$serialized_options$),\n");
800   }
801   printer_->Outdent();
802   printer_->Print("],\n");
803   // Serialization of proto
804   DescriptorProto edp;
805   PrintSerializedPbInterval(message_descriptor, edp);
806 
807   printer_->Outdent();
808   printer_->Print(")\n");
809 }
810 
811 // Prints Python Descriptor objects for all nested types contained in
812 // message_descriptor.
813 //
814 // Mutually recursive with PrintDescriptor().
PrintNestedDescriptors(const Descriptor & containing_descriptor) const815 void Generator::PrintNestedDescriptors(
816     const Descriptor& containing_descriptor) const {
817   for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
818     PrintDescriptor(*containing_descriptor.nested_type(i));
819   }
820 }
821 
822 // Prints all messages in |file|.
PrintMessages() const823 void Generator::PrintMessages() const {
824   for (int i = 0; i < file_->message_type_count(); ++i) {
825     std::vector<std::string> to_register;
826     PrintMessage(*file_->message_type(i), "", &to_register, false);
827     for (int j = 0; j < to_register.size(); ++j) {
828       printer_->Print("_sym_db.RegisterMessage($name$)\n", "name",
829                       ResolveKeyword(to_register[j]));
830     }
831     printer_->Print("\n");
832   }
833 }
834 
835 // Prints a Python class for the given message descriptor.  We defer to the
836 // metaclass to do almost all of the work of actually creating a useful class.
837 // The purpose of this function and its many helper functions above is merely
838 // to output a Python version of the descriptors, which the metaclass in
839 // reflection.py will use to construct the meat of the class itself.
840 //
841 // Mutually recursive with PrintNestedMessages().
842 // Collect nested message names to_register for the symbol_database.
PrintMessage(const Descriptor & message_descriptor,const std::string & prefix,std::vector<std::string> * to_register,bool is_nested) const843 void Generator::PrintMessage(const Descriptor& message_descriptor,
844                              const std::string& prefix,
845                              std::vector<std::string>* to_register,
846                              bool is_nested) const {
847   std::string qualified_name;
848   if (is_nested) {
849     if (IsPythonKeyword(message_descriptor.name())) {
850       qualified_name =
851           "getattr(" + prefix + ", '" + message_descriptor.name() + "')";
852     } else {
853       qualified_name = prefix + "." + message_descriptor.name();
854     }
855     printer_->Print(
856         "'$name$' : _reflection.GeneratedProtocolMessageType('$name$', "
857         "(_message.Message,), {\n",
858         "name", message_descriptor.name());
859   } else {
860     qualified_name = ResolveKeyword(message_descriptor.name());
861     printer_->Print(
862         "$qualified_name$ = _reflection.GeneratedProtocolMessageType('$name$', "
863         "(_message.Message,), {\n",
864         "qualified_name", qualified_name, "name", message_descriptor.name());
865   }
866   printer_->Indent();
867 
868   to_register->push_back(qualified_name);
869 
870   PrintNestedMessages(message_descriptor, qualified_name, to_register);
871   std::map<std::string, std::string> m;
872   m["descriptor_key"] = kDescriptorKey;
873   if (pure_python_workable_) {
874     m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
875   } else {
876     m["descriptor_name"] = "_descriptor.Descriptor(full_name='" +
877                            message_descriptor.full_name() + "')";
878   }
879   printer_->Print(m, "'$descriptor_key$' : $descriptor_name$,\n");
880   std::string module_name = ModuleName(file_->name());
881   printer_->Print("'__module__' : '$module_name$'\n", "module_name",
882                   module_name);
883   printer_->Print("# @@protoc_insertion_point(class_scope:$full_name$)\n",
884                   "full_name", message_descriptor.full_name());
885   printer_->Print("})\n");
886   printer_->Outdent();
887 }
888 
889 // Prints all nested messages within |containing_descriptor|.
890 // Mutually recursive with PrintMessage().
PrintNestedMessages(const Descriptor & containing_descriptor,const std::string & prefix,std::vector<std::string> * to_register) const891 void Generator::PrintNestedMessages(
892     const Descriptor& containing_descriptor, const std::string& prefix,
893     std::vector<std::string>* to_register) const {
894   for (int i = 0; i < containing_descriptor.nested_type_count(); ++i) {
895     printer_->Print("\n");
896     PrintMessage(*containing_descriptor.nested_type(i), prefix, to_register,
897                  true);
898     printer_->Print(",\n");
899   }
900 }
901 
902 // Recursively fixes foreign fields in all nested types in |descriptor|, then
903 // sets the message_type and enum_type of all message and enum fields to point
904 // to their respective descriptors.
905 // Args:
906 //   descriptor: descriptor to print fields for.
907 //   containing_descriptor: if descriptor is a nested type, this is its
908 //       containing type, or NULL if this is a root/top-level type.
FixForeignFieldsInDescriptor(const Descriptor & descriptor,const Descriptor * containing_descriptor) const909 void Generator::FixForeignFieldsInDescriptor(
910     const Descriptor& descriptor,
911     const Descriptor* containing_descriptor) const {
912   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
913     FixForeignFieldsInDescriptor(*descriptor.nested_type(i), &descriptor);
914   }
915 
916   for (int i = 0; i < descriptor.field_count(); ++i) {
917     const FieldDescriptor& field_descriptor = *descriptor.field(i);
918     FixForeignFieldsInField(&descriptor, field_descriptor, "fields_by_name");
919   }
920 
921   FixContainingTypeInDescriptor(descriptor, containing_descriptor);
922   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
923     const EnumDescriptor& enum_descriptor = *descriptor.enum_type(i);
924     FixContainingTypeInDescriptor(enum_descriptor, &descriptor);
925   }
926   for (int i = 0; i < descriptor.oneof_decl_count(); ++i) {
927     std::map<std::string, std::string> m;
928     const OneofDescriptor* oneof = descriptor.oneof_decl(i);
929     m["descriptor_name"] = ModuleLevelDescriptorName(descriptor);
930     m["oneof_name"] = oneof->name();
931     for (int j = 0; j < oneof->field_count(); ++j) {
932       m["field_name"] = oneof->field(j)->name();
933       printer_->Print(
934           m,
935           "$descriptor_name$.oneofs_by_name['$oneof_name$'].fields.append(\n"
936           "  $descriptor_name$.fields_by_name['$field_name$'])\n");
937       printer_->Print(
938           m,
939           "$descriptor_name$.fields_by_name['$field_name$'].containing_oneof = "
940           "$descriptor_name$.oneofs_by_name['$oneof_name$']\n");
941     }
942   }
943 }
944 
AddMessageToFileDescriptor(const Descriptor & descriptor) const945 void Generator::AddMessageToFileDescriptor(const Descriptor& descriptor) const {
946   std::map<std::string, std::string> m;
947   m["descriptor_name"] = kDescriptorKey;
948   m["message_name"] = descriptor.name();
949   m["message_descriptor_name"] = ModuleLevelDescriptorName(descriptor);
950   const char file_descriptor_template[] =
951       "$descriptor_name$.message_types_by_name['$message_name$'] = "
952       "$message_descriptor_name$\n";
953   printer_->Print(m, file_descriptor_template);
954 }
955 
AddServiceToFileDescriptor(const ServiceDescriptor & descriptor) const956 void Generator::AddServiceToFileDescriptor(
957     const ServiceDescriptor& descriptor) const {
958   std::map<std::string, std::string> m;
959   m["descriptor_name"] = kDescriptorKey;
960   m["service_name"] = descriptor.name();
961   m["service_descriptor_name"] = ModuleLevelServiceDescriptorName(descriptor);
962   const char file_descriptor_template[] =
963       "$descriptor_name$.services_by_name['$service_name$'] = "
964       "$service_descriptor_name$\n";
965   printer_->Print(m, file_descriptor_template);
966 }
967 
AddEnumToFileDescriptor(const EnumDescriptor & descriptor) const968 void Generator::AddEnumToFileDescriptor(
969     const EnumDescriptor& descriptor) const {
970   std::map<std::string, std::string> m;
971   m["descriptor_name"] = kDescriptorKey;
972   m["enum_name"] = descriptor.name();
973   m["enum_descriptor_name"] = ModuleLevelDescriptorName(descriptor);
974   const char file_descriptor_template[] =
975       "$descriptor_name$.enum_types_by_name['$enum_name$'] = "
976       "$enum_descriptor_name$\n";
977   printer_->Print(m, file_descriptor_template);
978 }
979 
AddExtensionToFileDescriptor(const FieldDescriptor & descriptor) const980 void Generator::AddExtensionToFileDescriptor(
981     const FieldDescriptor& descriptor) const {
982   std::map<std::string, std::string> m;
983   m["descriptor_name"] = kDescriptorKey;
984   m["field_name"] = descriptor.name();
985   m["resolved_name"] = ResolveKeyword(descriptor.name());
986   const char file_descriptor_template[] =
987       "$descriptor_name$.extensions_by_name['$field_name$'] = "
988       "$resolved_name$\n";
989   printer_->Print(m, file_descriptor_template);
990 }
991 
992 // Sets any necessary message_type and enum_type attributes
993 // for the Python version of |field|.
994 //
995 // containing_type may be NULL, in which case this is a module-level field.
996 //
997 // python_dict_name is the name of the Python dict where we should
998 // look the field up in the containing type.  (e.g., fields_by_name
999 // or extensions_by_name).  We ignore python_dict_name if containing_type
1000 // is NULL.
FixForeignFieldsInField(const Descriptor * containing_type,const FieldDescriptor & field,const std::string & python_dict_name) const1001 void Generator::FixForeignFieldsInField(
1002     const Descriptor* containing_type, const FieldDescriptor& field,
1003     const std::string& python_dict_name) const {
1004   const std::string field_referencing_expression =
1005       FieldReferencingExpression(containing_type, field, python_dict_name);
1006   std::map<std::string, std::string> m;
1007   m["field_ref"] = field_referencing_expression;
1008   const Descriptor* foreign_message_type = field.message_type();
1009   if (foreign_message_type) {
1010     m["foreign_type"] = ModuleLevelDescriptorName(*foreign_message_type);
1011     printer_->Print(m, "$field_ref$.message_type = $foreign_type$\n");
1012   }
1013   const EnumDescriptor* enum_type = field.enum_type();
1014   if (enum_type) {
1015     m["enum_type"] = ModuleLevelDescriptorName(*enum_type);
1016     printer_->Print(m, "$field_ref$.enum_type = $enum_type$\n");
1017   }
1018 }
1019 
1020 // Returns the module-level expression for the given FieldDescriptor.
1021 // Only works for fields in the .proto file this Generator is generating for.
1022 //
1023 // containing_type may be NULL, in which case this is a module-level field.
1024 //
1025 // python_dict_name is the name of the Python dict where we should
1026 // look the field up in the containing type.  (e.g., fields_by_name
1027 // or extensions_by_name).  We ignore python_dict_name if containing_type
1028 // is NULL.
FieldReferencingExpression(const Descriptor * containing_type,const FieldDescriptor & field,const std::string & python_dict_name) const1029 std::string Generator::FieldReferencingExpression(
1030     const Descriptor* containing_type, const FieldDescriptor& field,
1031     const std::string& python_dict_name) const {
1032   // We should only ever be looking up fields in the current file.
1033   // The only things we refer to from other files are message descriptors.
1034   GOOGLE_CHECK_EQ(field.file(), file_)
1035       << field.file()->name() << " vs. " << file_->name();
1036   if (!containing_type) {
1037     return ResolveKeyword(field.name());
1038   }
1039   return strings::Substitute("$0.$1['$2']",
1040                           ModuleLevelDescriptorName(*containing_type),
1041                           python_dict_name, field.name());
1042 }
1043 
1044 // Prints containing_type for nested descriptors or enum descriptors.
1045 template <typename DescriptorT>
FixContainingTypeInDescriptor(const DescriptorT & descriptor,const Descriptor * containing_descriptor) const1046 void Generator::FixContainingTypeInDescriptor(
1047     const DescriptorT& descriptor,
1048     const Descriptor* containing_descriptor) const {
1049   if (containing_descriptor != nullptr) {
1050     const std::string nested_name = ModuleLevelDescriptorName(descriptor);
1051     const std::string parent_name =
1052         ModuleLevelDescriptorName(*containing_descriptor);
1053     printer_->Print("$nested_name$.containing_type = $parent_name$\n",
1054                     "nested_name", nested_name, "parent_name", parent_name);
1055   }
1056 }
1057 
1058 // Prints statements setting the message_type and enum_type fields in the
1059 // Python descriptor objects we've already output in the file.  We must
1060 // do this in a separate step due to circular references (otherwise, we'd
1061 // just set everything in the initial assignment statements).
FixForeignFieldsInDescriptors() const1062 void Generator::FixForeignFieldsInDescriptors() const {
1063   for (int i = 0; i < file_->message_type_count(); ++i) {
1064     FixForeignFieldsInDescriptor(*file_->message_type(i), nullptr);
1065   }
1066   for (int i = 0; i < file_->message_type_count(); ++i) {
1067     AddMessageToFileDescriptor(*file_->message_type(i));
1068   }
1069   for (int i = 0; i < file_->enum_type_count(); ++i) {
1070     AddEnumToFileDescriptor(*file_->enum_type(i));
1071   }
1072   for (int i = 0; i < file_->extension_count(); ++i) {
1073     AddExtensionToFileDescriptor(*file_->extension(i));
1074   }
1075 
1076   // TODO(jieluo): Move this register to PrintFileDescriptor() when
1077   // FieldDescriptor.file is added in generated file.
1078   printer_->Print("_sym_db.RegisterFileDescriptor($name$)\n", "name",
1079                   kDescriptorKey);
1080   printer_->Print("\n");
1081 }
1082 
1083 // We need to not only set any necessary message_type fields, but
1084 // also need to call RegisterExtension() on each message we're
1085 // extending.
FixForeignFieldsInExtensions() const1086 void Generator::FixForeignFieldsInExtensions() const {
1087   // Top-level extensions.
1088   for (int i = 0; i < file_->extension_count(); ++i) {
1089     FixForeignFieldsInExtension(*file_->extension(i));
1090   }
1091   // Nested extensions.
1092   for (int i = 0; i < file_->message_type_count(); ++i) {
1093     FixForeignFieldsInNestedExtensions(*file_->message_type(i));
1094   }
1095   printer_->Print("\n");
1096 }
1097 
FixForeignFieldsInExtension(const FieldDescriptor & extension_field) const1098 void Generator::FixForeignFieldsInExtension(
1099     const FieldDescriptor& extension_field) const {
1100   GOOGLE_CHECK(extension_field.is_extension());
1101   // extension_scope() will be NULL for top-level extensions, which is
1102   // exactly what FixForeignFieldsInField() wants.
1103   FixForeignFieldsInField(extension_field.extension_scope(), extension_field,
1104                           "extensions_by_name");
1105 
1106   std::map<std::string, std::string> m;
1107   // Confusingly, for FieldDescriptors that happen to be extensions,
1108   // containing_type() means "extended type."
1109   // On the other hand, extension_scope() will give us what we normally
1110   // mean by containing_type().
1111   m["extended_message_class"] =
1112       ModuleLevelMessageName(*extension_field.containing_type());
1113   m["field"] = FieldReferencingExpression(
1114       extension_field.extension_scope(), extension_field, "extensions_by_name");
1115   printer_->Print(m, "$extended_message_class$.RegisterExtension($field$)\n");
1116 }
1117 
FixForeignFieldsInNestedExtensions(const Descriptor & descriptor) const1118 void Generator::FixForeignFieldsInNestedExtensions(
1119     const Descriptor& descriptor) const {
1120   // Recursively fix up extensions in all nested types.
1121   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
1122     FixForeignFieldsInNestedExtensions(*descriptor.nested_type(i));
1123   }
1124   // Fix up extensions directly contained within this type.
1125   for (int i = 0; i < descriptor.extension_count(); ++i) {
1126     FixForeignFieldsInExtension(*descriptor.extension(i));
1127   }
1128 }
1129 
1130 // Returns a Python expression that instantiates a Python EnumValueDescriptor
1131 // object for the given C++ descriptor.
PrintEnumValueDescriptor(const EnumValueDescriptor & descriptor) const1132 void Generator::PrintEnumValueDescriptor(
1133     const EnumValueDescriptor& descriptor) const {
1134   // TODO(robinson): Fix up EnumValueDescriptor "type" fields.
1135   // More circular references.  ::sigh::
1136   std::string options_string;
1137   descriptor.options().SerializeToString(&options_string);
1138   std::map<std::string, std::string> m;
1139   m["name"] = descriptor.name();
1140   m["index"] = StrCat(descriptor.index());
1141   m["number"] = StrCat(descriptor.number());
1142   m["options"] = OptionsValue(options_string);
1143   printer_->Print(m,
1144                   "_descriptor.EnumValueDescriptor(\n"
1145                   "  name='$name$', index=$index$, number=$number$,\n"
1146                   "  serialized_options=$options$,\n"
1147                   "  type=None,\n"
1148                   "  create_key=_descriptor._internal_create_key)");
1149 }
1150 
1151 // Returns a CEscaped string of serialized_options.
OptionsValue(const std::string & serialized_options) const1152 std::string Generator::OptionsValue(
1153     const std::string& serialized_options) const {
1154   if (serialized_options.length() == 0 || GeneratingDescriptorProto()) {
1155     return "None";
1156   } else {
1157     return "b'" + CEscape(serialized_options) + "'";
1158   }
1159 }
1160 
1161 // Prints an expression for a Python FieldDescriptor for |field|.
PrintFieldDescriptor(const FieldDescriptor & field,bool is_extension) const1162 void Generator::PrintFieldDescriptor(const FieldDescriptor& field,
1163                                      bool is_extension) const {
1164   std::string options_string;
1165   field.options().SerializeToString(&options_string);
1166   std::map<std::string, std::string> m;
1167   m["name"] = field.name();
1168   m["full_name"] = field.full_name();
1169   m["index"] = StrCat(field.index());
1170   m["number"] = StrCat(field.number());
1171   m["type"] = StrCat(field.type());
1172   m["cpp_type"] = StrCat(field.cpp_type());
1173   m["label"] = StrCat(field.label());
1174   m["has_default_value"] = field.has_default_value() ? "True" : "False";
1175   m["default_value"] = StringifyDefaultValue(field);
1176   m["is_extension"] = is_extension ? "True" : "False";
1177   m["serialized_options"] = OptionsValue(options_string);
1178   m["json_name"] =
1179       field.has_json_name() ? ", json_name='" + field.json_name() + "'" : "";
1180   // We always set message_type and enum_type to None at this point, and then
1181   // these fields in correctly after all referenced descriptors have been
1182   // defined and/or imported (see FixForeignFieldsInDescriptors()).
1183   const char field_descriptor_decl[] =
1184       "_descriptor.FieldDescriptor(\n"
1185       "  name='$name$', full_name='$full_name$', index=$index$,\n"
1186       "  number=$number$, type=$type$, cpp_type=$cpp_type$, label=$label$,\n"
1187       "  has_default_value=$has_default_value$, "
1188       "default_value=$default_value$,\n"
1189       "  message_type=None, enum_type=None, containing_type=None,\n"
1190       "  is_extension=$is_extension$, extension_scope=None,\n"
1191       "  serialized_options=$serialized_options$$json_name$, file=DESCRIPTOR,"
1192       "  create_key=_descriptor._internal_create_key)";
1193   printer_->Print(m, field_descriptor_decl);
1194 }
1195 
1196 // Helper for Print{Fields,Extensions}InDescriptor().
PrintFieldDescriptorsInDescriptor(const Descriptor & message_descriptor,bool is_extension,const std::string & list_variable_name,int (Descriptor::* CountFn)()const,const FieldDescriptor * (Descriptor::* GetterFn)(int)const) const1197 void Generator::PrintFieldDescriptorsInDescriptor(
1198     const Descriptor& message_descriptor, bool is_extension,
1199     const std::string& list_variable_name, int (Descriptor::*CountFn)() const,
1200     const FieldDescriptor* (Descriptor::*GetterFn)(int)const) const {
1201   printer_->Print("$list$=[\n", "list", list_variable_name);
1202   printer_->Indent();
1203   for (int i = 0; i < (message_descriptor.*CountFn)(); ++i) {
1204     PrintFieldDescriptor(*(message_descriptor.*GetterFn)(i), is_extension);
1205     printer_->Print(",\n");
1206   }
1207   printer_->Outdent();
1208   printer_->Print("],\n");
1209 }
1210 
1211 // Prints a statement assigning "fields" to a list of Python FieldDescriptors,
1212 // one for each field present in message_descriptor.
PrintFieldsInDescriptor(const Descriptor & message_descriptor) const1213 void Generator::PrintFieldsInDescriptor(
1214     const Descriptor& message_descriptor) const {
1215   const bool is_extension = false;
1216   PrintFieldDescriptorsInDescriptor(message_descriptor, is_extension, "fields",
1217                                     &Descriptor::field_count,
1218                                     &Descriptor::field);
1219 }
1220 
1221 // Prints a statement assigning "extensions" to a list of Python
1222 // FieldDescriptors, one for each extension present in message_descriptor.
PrintExtensionsInDescriptor(const Descriptor & message_descriptor) const1223 void Generator::PrintExtensionsInDescriptor(
1224     const Descriptor& message_descriptor) const {
1225   const bool is_extension = true;
1226   PrintFieldDescriptorsInDescriptor(message_descriptor, is_extension,
1227                                     "extensions", &Descriptor::extension_count,
1228                                     &Descriptor::extension);
1229 }
1230 
GeneratingDescriptorProto() const1231 bool Generator::GeneratingDescriptorProto() const {
1232   return file_->name() == "net/proto2/proto/descriptor.proto" ||
1233          file_->name() == "google/protobuf/descriptor.proto";
1234 }
1235 
1236 // Returns the unique Python module-level identifier given to a descriptor.
1237 // This name is module-qualified iff the given descriptor describes an
1238 // entity that doesn't come from the current file.
1239 template <typename DescriptorT>
ModuleLevelDescriptorName(const DescriptorT & descriptor) const1240 std::string Generator::ModuleLevelDescriptorName(
1241     const DescriptorT& descriptor) const {
1242   // FIXME(robinson):
1243   // We currently don't worry about collisions with underscores in the type
1244   // names, so these would collide in nasty ways if found in the same file:
1245   //   OuterProto.ProtoA.ProtoB
1246   //   OuterProto_ProtoA.ProtoB  # Underscore instead of period.
1247   // As would these:
1248   //   OuterProto.ProtoA_.ProtoB
1249   //   OuterProto.ProtoA._ProtoB  # Leading vs. trailing underscore.
1250   // (Contrived, but certainly possible).
1251   //
1252   // The C++ implementation doesn't guard against this either.  Leaving
1253   // it for now...
1254   std::string name = NamePrefixedWithNestedTypes(descriptor, "_");
1255   ToUpper(&name);
1256   // Module-private for now.  Easy to make public later; almost impossible
1257   // to make private later.
1258   name = "_" + name;
1259   // We now have the name relative to its own module.  Also qualify with
1260   // the module name iff this descriptor is from a different .proto file.
1261   if (descriptor.file() != file_) {
1262     name = ModuleAlias(descriptor.file()->name()) + "." + name;
1263   }
1264   return name;
1265 }
1266 
1267 // Returns the name of the message class itself, not the descriptor.
1268 // Like ModuleLevelDescriptorName(), module-qualifies the name iff
1269 // the given descriptor describes an entity that doesn't come from
1270 // the current file.
ModuleLevelMessageName(const Descriptor & descriptor) const1271 std::string Generator::ModuleLevelMessageName(
1272     const Descriptor& descriptor) const {
1273   std::string name = NamePrefixedWithNestedTypes(descriptor, ".");
1274   if (descriptor.file() != file_) {
1275     name = ModuleAlias(descriptor.file()->name()) + "." + name;
1276   }
1277   return name;
1278 }
1279 
1280 // Returns the unique Python module-level identifier given to a service
1281 // descriptor.
ModuleLevelServiceDescriptorName(const ServiceDescriptor & descriptor) const1282 std::string Generator::ModuleLevelServiceDescriptorName(
1283     const ServiceDescriptor& descriptor) const {
1284   std::string name = descriptor.name();
1285   ToUpper(&name);
1286   name = "_" + name;
1287   if (descriptor.file() != file_) {
1288     name = ModuleAlias(descriptor.file()->name()) + "." + name;
1289   }
1290   return name;
1291 }
1292 
1293 // Prints standard constructor arguments serialized_start and serialized_end.
1294 // Args:
1295 //   descriptor: The cpp descriptor to have a serialized reference.
1296 //   proto: A proto
1297 // Example printer output:
1298 // serialized_start=41,
1299 // serialized_end=43,
1300 //
1301 template <typename DescriptorT, typename DescriptorProtoT>
PrintSerializedPbInterval(const DescriptorT & descriptor,DescriptorProtoT & proto) const1302 void Generator::PrintSerializedPbInterval(const DescriptorT& descriptor,
1303                                           DescriptorProtoT& proto) const {
1304   descriptor.CopyTo(&proto);
1305   std::string sp;
1306   proto.SerializeToString(&sp);
1307   int offset = file_descriptor_serialized_.find(sp);
1308   GOOGLE_CHECK_GE(offset, 0);
1309 
1310   printer_->Print(
1311       "serialized_start=$serialized_start$,\n"
1312       "serialized_end=$serialized_end$,\n",
1313       "serialized_start", StrCat(offset), "serialized_end",
1314       StrCat(offset + sp.size()));
1315 }
1316 
1317 namespace {
PrintDescriptorOptionsFixingCode(const std::string & descriptor,const std::string & options,io::Printer * printer)1318 void PrintDescriptorOptionsFixingCode(const std::string& descriptor,
1319                                       const std::string& options,
1320                                       io::Printer* printer) {
1321   // Reset the _options to None thus DescriptorBase.GetOptions() can
1322   // parse _options again after extensions are registered.
1323   printer->Print("$descriptor$._options = None\n", "descriptor", descriptor);
1324 }
1325 }  // namespace
1326 
1327 // Prints expressions that set the options field of all descriptors.
FixAllDescriptorOptions() const1328 void Generator::FixAllDescriptorOptions() const {
1329   // Prints an expression that sets the file descriptor's options.
1330   std::string file_options = OptionsValue(file_->options().SerializeAsString());
1331   if (file_options != "None") {
1332     PrintDescriptorOptionsFixingCode(kDescriptorKey, file_options, printer_);
1333   }
1334   // Prints expressions that set the options for all top level enums.
1335   for (int i = 0; i < file_->enum_type_count(); ++i) {
1336     const EnumDescriptor& enum_descriptor = *file_->enum_type(i);
1337     FixOptionsForEnum(enum_descriptor);
1338   }
1339   // Prints expressions that set the options for all top level extensions.
1340   for (int i = 0; i < file_->extension_count(); ++i) {
1341     const FieldDescriptor& field = *file_->extension(i);
1342     FixOptionsForField(field);
1343   }
1344   // Prints expressions that set the options for all messages, nested enums,
1345   // nested extensions and message fields.
1346   for (int i = 0; i < file_->message_type_count(); ++i) {
1347     FixOptionsForMessage(*file_->message_type(i));
1348   }
1349 }
1350 
FixOptionsForOneof(const OneofDescriptor & oneof) const1351 void Generator::FixOptionsForOneof(const OneofDescriptor& oneof) const {
1352   std::string oneof_options = OptionsValue(oneof.options().SerializeAsString());
1353   if (oneof_options != "None") {
1354     std::string oneof_name = strings::Substitute(
1355         "$0.$1['$2']", ModuleLevelDescriptorName(*oneof.containing_type()),
1356         "oneofs_by_name", oneof.name());
1357     PrintDescriptorOptionsFixingCode(oneof_name, oneof_options, printer_);
1358   }
1359 }
1360 
1361 // Prints expressions that set the options for an enum descriptor and its
1362 // value descriptors.
FixOptionsForEnum(const EnumDescriptor & enum_descriptor) const1363 void Generator::FixOptionsForEnum(const EnumDescriptor& enum_descriptor) const {
1364   std::string descriptor_name = ModuleLevelDescriptorName(enum_descriptor);
1365   std::string enum_options =
1366       OptionsValue(enum_descriptor.options().SerializeAsString());
1367   if (enum_options != "None") {
1368     PrintDescriptorOptionsFixingCode(descriptor_name, enum_options, printer_);
1369   }
1370   for (int i = 0; i < enum_descriptor.value_count(); ++i) {
1371     const EnumValueDescriptor& value_descriptor = *enum_descriptor.value(i);
1372     std::string value_options =
1373         OptionsValue(value_descriptor.options().SerializeAsString());
1374     if (value_options != "None") {
1375       PrintDescriptorOptionsFixingCode(
1376           StringPrintf("%s.values_by_name[\"%s\"]", descriptor_name.c_str(),
1377                        value_descriptor.name().c_str()),
1378           value_options, printer_);
1379     }
1380   }
1381 }
1382 
1383 // Prints expressions that set the options for field descriptors (including
1384 // extensions).
FixOptionsForField(const FieldDescriptor & field) const1385 void Generator::FixOptionsForField(const FieldDescriptor& field) const {
1386   std::string field_options = OptionsValue(field.options().SerializeAsString());
1387   if (field_options != "None") {
1388     std::string field_name;
1389     if (field.is_extension()) {
1390       if (field.extension_scope() == nullptr) {
1391         // Top level extensions.
1392         field_name = field.name();
1393       } else {
1394         field_name = FieldReferencingExpression(field.extension_scope(), field,
1395                                                 "extensions_by_name");
1396       }
1397     } else {
1398       field_name = FieldReferencingExpression(field.containing_type(), field,
1399                                               "fields_by_name");
1400     }
1401     PrintDescriptorOptionsFixingCode(field_name, field_options, printer_);
1402   }
1403 }
1404 
1405 // Prints expressions that set the options for a message and all its inner
1406 // types (nested messages, nested enums, extensions, fields).
FixOptionsForMessage(const Descriptor & descriptor) const1407 void Generator::FixOptionsForMessage(const Descriptor& descriptor) const {
1408   // Nested messages.
1409   for (int i = 0; i < descriptor.nested_type_count(); ++i) {
1410     FixOptionsForMessage(*descriptor.nested_type(i));
1411   }
1412   // Oneofs.
1413   for (int i = 0; i < descriptor.oneof_decl_count(); ++i) {
1414     FixOptionsForOneof(*descriptor.oneof_decl(i));
1415   }
1416   // Enums.
1417   for (int i = 0; i < descriptor.enum_type_count(); ++i) {
1418     FixOptionsForEnum(*descriptor.enum_type(i));
1419   }
1420   // Fields.
1421   for (int i = 0; i < descriptor.field_count(); ++i) {
1422     const FieldDescriptor& field = *descriptor.field(i);
1423     FixOptionsForField(field);
1424   }
1425   // Extensions.
1426   for (int i = 0; i < descriptor.extension_count(); ++i) {
1427     const FieldDescriptor& field = *descriptor.extension(i);
1428     FixOptionsForField(field);
1429   }
1430   // Message option for this message.
1431   std::string message_options =
1432       OptionsValue(descriptor.options().SerializeAsString());
1433   if (message_options != "None") {
1434     std::string descriptor_name = ModuleLevelDescriptorName(descriptor);
1435     PrintDescriptorOptionsFixingCode(descriptor_name, message_options,
1436                                      printer_);
1437   }
1438 }
1439 
1440 // If a dependency forwards other files through public dependencies, let's
1441 // copy over the corresponding module aliases.
CopyPublicDependenciesAliases(const std::string & copy_from,const FileDescriptor * file) const1442 void Generator::CopyPublicDependenciesAliases(
1443     const std::string& copy_from, const FileDescriptor* file) const {
1444   for (int i = 0; i < file->public_dependency_count(); ++i) {
1445     std::string module_name = ModuleName(file->public_dependency(i)->name());
1446     std::string module_alias = ModuleAlias(file->public_dependency(i)->name());
1447     // There's no module alias in the dependent file if it was generated by
1448     // an old protoc (less than 3.0.0-alpha-1). Use module name in this
1449     // situation.
1450     printer_->Print(
1451         "try:\n"
1452         "  $alias$ = $copy_from$.$alias$\n"
1453         "except AttributeError:\n"
1454         "  $alias$ = $copy_from$.$module$\n",
1455         "alias", module_alias, "module", module_name, "copy_from", copy_from);
1456     CopyPublicDependenciesAliases(copy_from, file->public_dependency(i));
1457   }
1458 }
1459 
1460 }  // namespace python
1461 }  // namespace compiler
1462 }  // namespace protobuf
1463 }  // namespace google
1464