1 // Copyright (c) 2009-2021, Google LLC
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above copyright
9 //       notice, this list of conditions and the following disclaimer in the
10 //       documentation and/or other materials provided with the distribution.
11 //     * Neither the name of Google LLC nor the
12 //       names of its contributors may be used to endorse or promote products
13 //       derived from this software without specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 // DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
19 // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 #ifndef UPB_DEF_HPP_
27 #define UPB_DEF_HPP_
28 
29 #include <cstring>
30 #include <memory>
31 #include <string>
32 #include <vector>
33 
34 #include "upb/def.h"
35 #include "upb/reflection.h"
36 #include "upb/upb.hpp"
37 
38 namespace upb {
39 
40 typedef upb_msgval MessageValue;
41 
42 class EnumDefPtr;
43 class MessageDefPtr;
44 class OneofDefPtr;
45 
46 // A upb::FieldDefPtr describes a single field in a message.  It is most often
47 // found as a part of a upb_msgdef, but can also stand alone to represent
48 // an extension.
49 class FieldDefPtr {
50  public:
FieldDefPtr()51   FieldDefPtr() : ptr_(nullptr) {}
FieldDefPtr(const upb_fielddef * ptr)52   explicit FieldDefPtr(const upb_fielddef* ptr) : ptr_(ptr) {}
53 
ptr() const54   const upb_fielddef* ptr() const { return ptr_; }
operator bool() const55   explicit operator bool() const { return ptr_ != nullptr; }
56 
57   typedef upb_fieldtype_t Type;
58   typedef upb_label_t Label;
59   typedef upb_descriptortype_t DescriptorType;
60 
full_name() const61   const char* full_name() const { return upb_fielddef_fullname(ptr_); }
62 
type() const63   Type type() const { return upb_fielddef_type(ptr_); }
label() const64   Label label() const { return upb_fielddef_label(ptr_); }
name() const65   const char* name() const { return upb_fielddef_name(ptr_); }
json_name() const66   const char* json_name() const { return upb_fielddef_jsonname(ptr_); }
number() const67   uint32_t number() const { return upb_fielddef_number(ptr_); }
is_extension() const68   bool is_extension() const { return upb_fielddef_isextension(ptr_); }
69 
70   // For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false,
71   // indicates whether this field should have lazy parsing handlers that yield
72   // the unparsed string for the submessage.
73   //
74   // TODO(haberman): I think we want to move this into a FieldOptions container
75   // when we add support for custom options (the FieldOptions struct will
76   // contain both regular FieldOptions like "lazy" *and* custom options).
lazy() const77   bool lazy() const { return upb_fielddef_lazy(ptr_); }
78 
79   // For non-string, non-submessage fields, this indicates whether binary
80   // protobufs are encoded in packed or non-packed format.
81   //
82   // TODO(haberman): see note above about putting options like this into a
83   // FieldOptions container.
packed() const84   bool packed() const { return upb_fielddef_packed(ptr_); }
85 
86   // An integer that can be used as an index into an array of fields for
87   // whatever message this field belongs to.  Guaranteed to be less than
88   // f->containing_type()->field_count().  May only be accessed once the def has
89   // been finalized.
index() const90   uint32_t index() const { return upb_fielddef_index(ptr_); }
91 
92   // The MessageDef to which this field belongs.
93   //
94   // If this field has been added to a MessageDef, that message can be retrieved
95   // directly (this is always the case for frozen FieldDefs).
96   //
97   // If the field has not yet been added to a MessageDef, you can set the name
98   // of the containing type symbolically instead.  This is mostly useful for
99   // extensions, where the extension is declared separately from the message.
100   MessageDefPtr containing_type() const;
101 
102   // The OneofDef to which this field belongs, or NULL if this field is not part
103   // of a oneof.
104   OneofDefPtr containing_oneof() const;
105 
106   // The field's type according to the enum in descriptor.proto.  This is not
107   // the same as UPB_TYPE_*, because it distinguishes between (for example)
108   // INT32 and SINT32, whereas our "type" enum does not.  This return of
109   // descriptor_type() is a function of type(), integer_format(), and
110   // is_tag_delimited().
descriptor_type() const111   DescriptorType descriptor_type() const {
112     return upb_fielddef_descriptortype(ptr_);
113   }
114 
115   // Convenient field type tests.
IsSubMessage() const116   bool IsSubMessage() const { return upb_fielddef_issubmsg(ptr_); }
IsString() const117   bool IsString() const { return upb_fielddef_isstring(ptr_); }
IsSequence() const118   bool IsSequence() const { return upb_fielddef_isseq(ptr_); }
IsPrimitive() const119   bool IsPrimitive() const { return upb_fielddef_isprimitive(ptr_); }
IsMap() const120   bool IsMap() const { return upb_fielddef_ismap(ptr_); }
121 
122   // Returns the non-string default value for this fielddef, which may either
123   // be something the client set explicitly or the "default default" (0 for
124   // numbers, empty for strings).  The field's type indicates the type of the
125   // returned value, except for enum fields that are still mutable.
126   //
127   // Requires that the given function matches the field's current type.
default_int64() const128   int64_t default_int64() const { return upb_fielddef_defaultint64(ptr_); }
default_int32() const129   int32_t default_int32() const { return upb_fielddef_defaultint32(ptr_); }
default_uint64() const130   uint64_t default_uint64() const { return upb_fielddef_defaultuint64(ptr_); }
default_uint32() const131   uint32_t default_uint32() const { return upb_fielddef_defaultuint32(ptr_); }
default_bool() const132   bool default_bool() const { return upb_fielddef_defaultbool(ptr_); }
default_float() const133   float default_float() const { return upb_fielddef_defaultfloat(ptr_); }
default_double() const134   double default_double() const { return upb_fielddef_defaultdouble(ptr_); }
135 
default_value() const136   MessageValue default_value() const { return upb_fielddef_default(ptr_); }
137 
138   // The resulting string is always NULL-terminated.  If non-NULL, the length
139   // will be stored in *len.
default_string(size_t * len) const140   const char* default_string(size_t* len) const {
141     return upb_fielddef_defaultstr(ptr_, len);
142   }
143 
144   // Returns the enum or submessage def for this field, if any.  The field's
145   // type must match (ie. you may only call enum_subdef() for fields where
146   // type() == UPB_TYPE_ENUM).
147   EnumDefPtr enum_subdef() const;
148   MessageDefPtr message_subdef() const;
149 
150  private:
151   const upb_fielddef* ptr_;
152 };
153 
154 // Class that represents a oneof.
155 class OneofDefPtr {
156  public:
OneofDefPtr()157   OneofDefPtr() : ptr_(nullptr) {}
OneofDefPtr(const upb_oneofdef * ptr)158   explicit OneofDefPtr(const upb_oneofdef* ptr) : ptr_(ptr) {}
159 
ptr() const160   const upb_oneofdef* ptr() const { return ptr_; }
operator bool() const161   explicit operator bool() const { return ptr_ != nullptr; }
162 
163   // Returns the MessageDef that contains this OneofDef.
164   MessageDefPtr containing_type() const;
165 
166   // Returns the name of this oneof.
name() const167   const char* name() const { return upb_oneofdef_name(ptr_); }
168 
169   // Returns the number of fields in the oneof.
field_count() const170   int field_count() const { return upb_oneofdef_numfields(ptr_); }
field(int i) const171   FieldDefPtr field(int i) const { return FieldDefPtr(upb_oneofdef_field(ptr_, i)); }
172 
173   // Looks up by name.
FindFieldByName(const char * name,size_t len) const174   FieldDefPtr FindFieldByName(const char* name, size_t len) const {
175     return FieldDefPtr(upb_oneofdef_ntof(ptr_, name, len));
176   }
FindFieldByName(const char * name) const177   FieldDefPtr FindFieldByName(const char* name) const {
178     return FieldDefPtr(upb_oneofdef_ntofz(ptr_, name));
179   }
180 
181   template <class T>
FindFieldByName(const T & str) const182   FieldDefPtr FindFieldByName(const T& str) const {
183     return FindFieldByName(str.c_str(), str.size());
184   }
185 
186   // Looks up by tag number.
FindFieldByNumber(uint32_t num) const187   FieldDefPtr FindFieldByNumber(uint32_t num) const {
188     return FieldDefPtr(upb_oneofdef_itof(ptr_, num));
189   }
190 
191  private:
192   const upb_oneofdef* ptr_;
193 };
194 
195 // Structure that describes a single .proto message type.
196 class MessageDefPtr {
197  public:
MessageDefPtr()198   MessageDefPtr() : ptr_(nullptr) {}
MessageDefPtr(const upb_msgdef * ptr)199   explicit MessageDefPtr(const upb_msgdef* ptr) : ptr_(ptr) {}
200 
ptr() const201   const upb_msgdef* ptr() const { return ptr_; }
operator bool() const202   explicit operator bool() const { return ptr_ != nullptr; }
203 
full_name() const204   const char* full_name() const { return upb_msgdef_fullname(ptr_); }
name() const205   const char* name() const { return upb_msgdef_name(ptr_); }
206 
207   // The number of fields that belong to the MessageDef.
field_count() const208   int field_count() const { return upb_msgdef_numfields(ptr_); }
field(int i) const209   FieldDefPtr field(int i) const { return FieldDefPtr(upb_msgdef_field(ptr_, i)); }
210 
211   // The number of oneofs that belong to the MessageDef.
oneof_count() const212   int oneof_count() const { return upb_msgdef_numoneofs(ptr_); }
oneof(int i) const213   OneofDefPtr oneof(int i) const { return OneofDefPtr(upb_msgdef_oneof(ptr_, i)); }
214 
syntax() const215   upb_syntax_t syntax() const { return upb_msgdef_syntax(ptr_); }
216 
217   // These return null pointers if the field is not found.
FindFieldByNumber(uint32_t number) const218   FieldDefPtr FindFieldByNumber(uint32_t number) const {
219     return FieldDefPtr(upb_msgdef_itof(ptr_, number));
220   }
FindFieldByName(const char * name,size_t len) const221   FieldDefPtr FindFieldByName(const char* name, size_t len) const {
222     return FieldDefPtr(upb_msgdef_ntof(ptr_, name, len));
223   }
FindFieldByName(const char * name) const224   FieldDefPtr FindFieldByName(const char* name) const {
225     return FieldDefPtr(upb_msgdef_ntofz(ptr_, name));
226   }
227 
228   template <class T>
FindFieldByName(const T & str) const229   FieldDefPtr FindFieldByName(const T& str) const {
230     return FindFieldByName(str.c_str(), str.size());
231   }
232 
FindOneofByName(const char * name,size_t len) const233   OneofDefPtr FindOneofByName(const char* name, size_t len) const {
234     return OneofDefPtr(upb_msgdef_ntoo(ptr_, name, len));
235   }
236 
FindOneofByName(const char * name) const237   OneofDefPtr FindOneofByName(const char* name) const {
238     return OneofDefPtr(upb_msgdef_ntooz(ptr_, name));
239   }
240 
241   template <class T>
FindOneofByName(const T & str) const242   OneofDefPtr FindOneofByName(const T& str) const {
243     return FindOneofByName(str.c_str(), str.size());
244   }
245 
246   // Is this message a map entry?
mapentry() const247   bool mapentry() const { return upb_msgdef_mapentry(ptr_); }
248 
249   // Return the type of well known type message. UPB_WELLKNOWN_UNSPECIFIED for
250   // non-well-known message.
wellknowntype() const251   upb_wellknowntype_t wellknowntype() const {
252     return upb_msgdef_wellknowntype(ptr_);
253   }
254 
255   // Whether is a number wrapper.
isnumberwrapper() const256   bool isnumberwrapper() const { return upb_msgdef_isnumberwrapper(ptr_); }
257 
258  private:
259   class FieldIter {
260    public:
FieldIter(const upb_msgdef * m,int i)261     explicit FieldIter(const upb_msgdef *m, int i) : m_(m), i_(i) {}
operator ++()262     void operator++() { i_++; }
263 
operator *()264     FieldDefPtr operator*() { return FieldDefPtr(upb_msgdef_field(m_, i_)); }
operator !=(const FieldIter & other)265     bool operator!=(const FieldIter& other) { return i_ != other.i_; }
operator ==(const FieldIter & other)266     bool operator==(const FieldIter& other) { return i_ == other.i_; }
267 
268    private:
269     const upb_msgdef *m_;
270     int i_;
271   };
272 
273   class FieldAccessor {
274    public:
FieldAccessor(const upb_msgdef * md)275     explicit FieldAccessor(const upb_msgdef* md) : md_(md) {}
begin()276     FieldIter begin() { return FieldIter(md_, 0); }
end()277     FieldIter end() { return FieldIter(md_, upb_msgdef_fieldcount(md_)); }
278 
279    private:
280     const upb_msgdef* md_;
281   };
282 
283   class OneofIter {
284    public:
OneofIter(const upb_msgdef * m,int i)285     explicit OneofIter(const upb_msgdef *m, int i) : m_(m), i_(i) {}
operator ++()286     void operator++() { i_++; }
287 
operator *()288     OneofDefPtr operator*() { return OneofDefPtr(upb_msgdef_oneof(m_, i_)); }
operator !=(const OneofIter & other)289     bool operator!=(const OneofIter& other) { return i_ != other.i_; }
operator ==(const OneofIter & other)290     bool operator==(const OneofIter& other) { return i_ == other.i_; }
291 
292    private:
293     const upb_msgdef *m_;
294     int i_;
295   };
296 
297   class OneofAccessor {
298    public:
OneofAccessor(const upb_msgdef * md)299     explicit OneofAccessor(const upb_msgdef* md) : md_(md) {}
begin()300     OneofIter begin() { return OneofIter(md_, 0); }
end()301     OneofIter end() { return OneofIter(md_, upb_msgdef_oneofcount(md_)); }
302 
303    private:
304     const upb_msgdef* md_;
305   };
306 
307  public:
fields() const308   FieldAccessor fields() const { return FieldAccessor(ptr()); }
oneofs() const309   OneofAccessor oneofs() const { return OneofAccessor(ptr()); }
310 
311  private:
312   const upb_msgdef* ptr_;
313 };
314 
315 class EnumDefPtr {
316  public:
EnumDefPtr()317   EnumDefPtr() : ptr_(nullptr) {}
EnumDefPtr(const upb_enumdef * ptr)318   explicit EnumDefPtr(const upb_enumdef* ptr) : ptr_(ptr) {}
319 
ptr() const320   const upb_enumdef* ptr() const { return ptr_; }
operator bool() const321   explicit operator bool() const { return ptr_ != nullptr; }
322 
full_name() const323   const char* full_name() const { return upb_enumdef_fullname(ptr_); }
name() const324   const char* name() const { return upb_enumdef_name(ptr_); }
325 
326   // The value that is used as the default when no field default is specified.
327   // If not set explicitly, the first value that was added will be used.
328   // The default value must be a member of the enum.
329   // Requires that value_count() > 0.
default_value() const330   int32_t default_value() const { return upb_enumdef_default(ptr_); }
331 
332   // Returns the number of values currently defined in the enum.  Note that
333   // multiple names can refer to the same number, so this may be greater than
334   // the total number of unique numbers.
value_count() const335   int value_count() const { return upb_enumdef_numvals(ptr_); }
336 
337   // Lookups from name to integer, returning true if found.
FindValueByName(const char * name,int32_t * num) const338   bool FindValueByName(const char* name, int32_t* num) const {
339     return upb_enumdef_ntoiz(ptr_, name, num);
340   }
341 
342   // Finds the name corresponding to the given number, or NULL if none was
343   // found.  If more than one name corresponds to this number, returns the
344   // first one that was added.
FindValueByNumber(int32_t num) const345   const char* FindValueByNumber(int32_t num) const {
346     return upb_enumdef_iton(ptr_, num);
347   }
348 
349   // Iteration over name/value pairs.  The order is undefined.
350   // Adding an enum val invalidates any iterators.
351   //
352   // TODO: make compatible with range-for, with elements as pairs?
353   class Iterator {
354    public:
Iterator(EnumDefPtr e)355     explicit Iterator(EnumDefPtr e) { upb_enum_begin(&iter_, e.ptr()); }
356 
number()357     int32_t number() { return upb_enum_iter_number(&iter_); }
name()358     const char* name() { return upb_enum_iter_name(&iter_); }
Done()359     bool Done() { return upb_enum_done(&iter_); }
Next()360     void Next() { return upb_enum_next(&iter_); }
361 
362    private:
363     upb_enum_iter iter_;
364   };
365 
366  private:
367   const upb_enumdef* ptr_;
368 };
369 
370 // Class that represents a .proto file with some things defined in it.
371 //
372 // Many users won't care about FileDefs, but they are necessary if you want to
373 // read the values of file-level options.
374 class FileDefPtr {
375  public:
FileDefPtr(const upb_filedef * ptr)376   explicit FileDefPtr(const upb_filedef* ptr) : ptr_(ptr) {}
377 
ptr() const378   const upb_filedef* ptr() const { return ptr_; }
operator bool() const379   explicit operator bool() const { return ptr_ != nullptr; }
380 
381   // Get/set name of the file (eg. "foo/bar.proto").
name() const382   const char* name() const { return upb_filedef_name(ptr_); }
383 
384   // Package name for definitions inside the file (eg. "foo.bar").
package() const385   const char* package() const { return upb_filedef_package(ptr_); }
386 
387   // Sets the php class prefix which is prepended to all php generated classes
388   // from this .proto. Default is empty.
phpprefix() const389   const char* phpprefix() const { return upb_filedef_phpprefix(ptr_); }
390 
391   // Use this option to change the namespace of php generated classes. Default
392   // is empty. When this option is empty, the package name will be used for
393   // determining the namespace.
phpnamespace() const394   const char* phpnamespace() const { return upb_filedef_phpnamespace(ptr_); }
395 
396   // Syntax for the file.  Defaults to proto2.
syntax() const397   upb_syntax_t syntax() const { return upb_filedef_syntax(ptr_); }
398 
399   // Get the list of dependencies from the file.  These are returned in the
400   // order that they were added to the FileDefPtr.
dependency_count() const401   int dependency_count() const { return upb_filedef_depcount(ptr_); }
dependency(int index) const402   const FileDefPtr dependency(int index) const {
403     return FileDefPtr(upb_filedef_dep(ptr_, index));
404   }
405 
406  private:
407   const upb_filedef* ptr_;
408 };
409 
410 // Non-const methods in upb::SymbolTable are NOT thread-safe.
411 class SymbolTable {
412  public:
SymbolTable()413   SymbolTable() : ptr_(upb_symtab_new(), upb_symtab_free) {}
SymbolTable(upb_symtab * s)414   explicit SymbolTable(upb_symtab* s) : ptr_(s, upb_symtab_free) {}
415 
ptr() const416   const upb_symtab* ptr() const { return ptr_.get(); }
ptr()417   upb_symtab* ptr() { return ptr_.get(); }
418 
419   // Finds an entry in the symbol table with this exact name.  If not found,
420   // returns NULL.
LookupMessage(const char * sym) const421   MessageDefPtr LookupMessage(const char* sym) const {
422     return MessageDefPtr(upb_symtab_lookupmsg(ptr_.get(), sym));
423   }
424 
LookupEnum(const char * sym) const425   EnumDefPtr LookupEnum(const char* sym) const {
426     return EnumDefPtr(upb_symtab_lookupenum(ptr_.get(), sym));
427   }
428 
LookupFile(const char * name) const429   FileDefPtr LookupFile(const char* name) const {
430     return FileDefPtr(upb_symtab_lookupfile(ptr_.get(), name));
431   }
432 
433   // TODO: iteration?
434 
435   // Adds the given serialized FileDescriptorProto to the pool.
AddFile(const google_protobuf_FileDescriptorProto * file_proto,Status * status)436   FileDefPtr AddFile(const google_protobuf_FileDescriptorProto* file_proto,
437                      Status* status) {
438     return FileDefPtr(
439         upb_symtab_addfile(ptr_.get(), file_proto, status->ptr()));
440   }
441 
442  private:
443   std::unique_ptr<upb_symtab, decltype(&upb_symtab_free)> ptr_;
444 };
445 
message_subdef() const446 inline MessageDefPtr FieldDefPtr::message_subdef() const {
447   return MessageDefPtr(upb_fielddef_msgsubdef(ptr_));
448 }
449 
containing_type() const450 inline MessageDefPtr FieldDefPtr::containing_type() const {
451   return MessageDefPtr(upb_fielddef_containingtype(ptr_));
452 }
453 
containing_type() const454 inline MessageDefPtr OneofDefPtr::containing_type() const {
455   return MessageDefPtr(upb_oneofdef_containingtype(ptr_));
456 }
457 
containing_oneof() const458 inline OneofDefPtr FieldDefPtr::containing_oneof() const {
459   return OneofDefPtr(upb_fielddef_containingoneof(ptr_));
460 }
461 
enum_subdef() const462 inline EnumDefPtr FieldDefPtr::enum_subdef() const {
463   return EnumDefPtr(upb_fielddef_enumsubdef(ptr_));
464 }
465 
466 }  // namespace upb
467 
468 #endif  // UPB_DEF_HPP_
469