1 
2 #include "upb/def.h"
3 
4 #include <ctype.h>
5 #include <errno.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include "google/protobuf/descriptor.upb.h"
9 
10 #include "upb/port_def.inc"
11 
12 typedef struct {
13   size_t len;
14   char str[1];  /* Null-terminated string data follows. */
15 } str_t;
16 
newstr(upb_alloc * alloc,const char * data,size_t len)17 static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) {
18   str_t *ret = upb_malloc(alloc, sizeof(*ret) + len);
19   if (!ret) return NULL;
20   ret->len = len;
21   if (len) memcpy(ret->str, data, len);
22   ret->str[len] = '\0';
23   return ret;
24 }
25 
26 struct upb_fielddef {
27   const upb_filedef *file;
28   const upb_msgdef *msgdef;
29   const char *full_name;
30   const char *json_name;
31   union {
32     int64_t sint;
33     uint64_t uint;
34     double dbl;
35     float flt;
36     bool boolean;
37     str_t *str;
38   } defaultval;
39   const upb_oneofdef *oneof;
40   union {
41     const upb_msgdef *msgdef;
42     const upb_enumdef *enumdef;
43     const google_protobuf_FieldDescriptorProto *unresolved;
44   } sub;
45   uint32_t number_;
46   uint16_t index_;
47   uint16_t layout_index;
48   uint32_t selector_base;  /* Used to index into a upb::Handlers table. */
49   bool is_extension_;
50   bool lazy_;
51   bool packed_;
52   bool proto3_optional_;
53   upb_descriptortype_t type_;
54   upb_label_t label_;
55 };
56 
57 struct upb_msgdef {
58   const upb_msglayout *layout;
59   const upb_filedef *file;
60   const char *full_name;
61   uint32_t selector_count;
62   uint32_t submsg_field_count;
63 
64   /* Tables for looking up fields by number and name. */
65   upb_inttable itof;
66   upb_strtable ntof;
67 
68   const upb_fielddef *fields;
69   const upb_oneofdef *oneofs;
70   int field_count;
71   int oneof_count;
72   int real_oneof_count;
73 
74   /* Is this a map-entry message? */
75   bool map_entry;
76   upb_wellknowntype_t well_known_type;
77 
78   /* TODO(haberman): proper extension ranges (there can be multiple). */
79 };
80 
81 struct upb_enumdef {
82   const upb_filedef *file;
83   const char *full_name;
84   upb_strtable ntoi;
85   upb_inttable iton;
86   int32_t defaultval;
87 };
88 
89 struct upb_oneofdef {
90   const upb_msgdef *parent;
91   const char *full_name;
92   uint32_t index;
93   upb_strtable ntof;
94   upb_inttable itof;
95 };
96 
97 struct upb_filedef {
98   const char *name;
99   const char *package;
100   const char *phpprefix;
101   const char *phpnamespace;
102   upb_syntax_t syntax;
103 
104   const upb_filedef **deps;
105   const upb_msgdef *msgs;
106   const upb_enumdef *enums;
107   const upb_fielddef *exts;
108 
109   int dep_count;
110   int msg_count;
111   int enum_count;
112   int ext_count;
113 };
114 
115 struct upb_symtab {
116   upb_arena *arena;
117   upb_strtable syms;  /* full_name -> packed def ptr */
118   upb_strtable files;  /* file_name -> upb_filedef* */
119 };
120 
121 /* Inside a symtab we store tagged pointers to specific def types. */
122 typedef enum {
123   UPB_DEFTYPE_FIELD = 0,
124 
125   /* Only inside symtab table. */
126   UPB_DEFTYPE_MSG = 1,
127   UPB_DEFTYPE_ENUM = 2,
128 
129   /* Only inside message table. */
130   UPB_DEFTYPE_ONEOF = 1,
131   UPB_DEFTYPE_FIELD_JSONNAME = 2
132 } upb_deftype_t;
133 
unpack_def(upb_value v,upb_deftype_t type)134 static const void *unpack_def(upb_value v, upb_deftype_t type) {
135   uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
136   return (num & 3) == type ? (const void*)(num & ~3) : NULL;
137 }
138 
pack_def(const void * ptr,upb_deftype_t type)139 static upb_value pack_def(const void *ptr, upb_deftype_t type) {
140   uintptr_t num = (uintptr_t)ptr | type;
141   return upb_value_constptr((const void*)num);
142 }
143 
144 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)145 static bool upb_isbetween(char c, char low, char high) {
146   return c >= low && c <= high;
147 }
148 
upb_isletter(char c)149 static bool upb_isletter(char c) {
150   return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
151 }
152 
upb_isalphanum(char c)153 static bool upb_isalphanum(char c) {
154   return upb_isletter(c) || upb_isbetween(c, '0', '9');
155 }
156 
upb_isident(upb_strview name,bool full,upb_status * s)157 static bool upb_isident(upb_strview name, bool full, upb_status *s) {
158   const char *str = name.data;
159   size_t len = name.size;
160   bool start = true;
161   size_t i;
162   for (i = 0; i < len; i++) {
163     char c = str[i];
164     if (c == '.') {
165       if (start || !full) {
166         upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
167         return false;
168       }
169       start = true;
170     } else if (start) {
171       if (!upb_isletter(c)) {
172         upb_status_seterrf(
173             s, "invalid name: path components must start with a letter (%s)",
174             str);
175         return false;
176       }
177       start = false;
178     } else {
179       if (!upb_isalphanum(c)) {
180         upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
181                            str);
182         return false;
183       }
184     }
185   }
186   return !start;
187 }
188 
shortdefname(const char * fullname)189 static const char *shortdefname(const char *fullname) {
190   const char *p;
191 
192   if (fullname == NULL) {
193     return NULL;
194   } else if ((p = strrchr(fullname, '.')) == NULL) {
195     /* No '.' in the name, return the full string. */
196     return fullname;
197   } else {
198     /* Return one past the last '.'. */
199     return p + 1;
200   }
201 }
202 
203 /* All submessage fields are lower than all other fields.
204  * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)205 uint32_t field_rank(const upb_fielddef *f) {
206   uint32_t ret = upb_fielddef_number(f);
207   const uint32_t high_bit = 1 << 30;
208   UPB_ASSERT(ret < high_bit);
209   if (!upb_fielddef_issubmsg(f))
210     ret |= high_bit;
211   return ret;
212 }
213 
cmp_fields(const void * p1,const void * p2)214 int cmp_fields(const void *p1, const void *p2) {
215   const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
216   const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
217   return field_rank(f1) - field_rank(f2);
218 }
219 
220 /* A few implementation details of handlers.  We put these here to avoid
221  * a def -> handlers dependency. */
222 
223 #define UPB_STATIC_SELECTOR_COUNT 3  /* Warning: also in upb/handlers.h. */
224 
upb_handlers_selectorbaseoffset(const upb_fielddef * f)225 static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
226   return upb_fielddef_isseq(f) ? 2 : 0;
227 }
228 
upb_handlers_selectorcount(const upb_fielddef * f)229 static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
230   uint32_t ret = 1;
231   if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
232   if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
233   if (upb_fielddef_issubmsg(f)) {
234     /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
235     ret += 0;
236     if (upb_fielddef_lazy(f)) {
237       /* STARTSTR/ENDSTR/STRING (for lazy) */
238       ret += 3;
239     }
240   }
241   return ret;
242 }
243 
upb_status_setoom(upb_status * status)244 static void upb_status_setoom(upb_status *status) {
245   upb_status_seterrmsg(status, "out of memory");
246 }
247 
assign_msg_indices(upb_msgdef * m,upb_status * s)248 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
249   /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
250    * lowest indexes, but we do not publicly guarantee this. */
251   upb_msg_field_iter j;
252   int i;
253   uint32_t selector;
254   int n = upb_msgdef_numfields(m);
255   upb_fielddef **fields;
256 
257   if (n == 0) {
258     m->selector_count = UPB_STATIC_SELECTOR_COUNT;
259     m->submsg_field_count = 0;
260     return true;
261   }
262 
263   fields = upb_gmalloc(n * sizeof(*fields));
264   if (!fields) {
265     upb_status_setoom(s);
266     return false;
267   }
268 
269   m->submsg_field_count = 0;
270   for(i = 0, upb_msg_field_begin(&j, m);
271       !upb_msg_field_done(&j);
272       upb_msg_field_next(&j), i++) {
273     upb_fielddef *f = upb_msg_iter_field(&j);
274     UPB_ASSERT(f->msgdef == m);
275     if (upb_fielddef_issubmsg(f)) {
276       m->submsg_field_count++;
277     }
278     fields[i] = f;
279   }
280 
281   qsort(fields, n, sizeof(*fields), cmp_fields);
282 
283   selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
284   for (i = 0; i < n; i++) {
285     upb_fielddef *f = fields[i];
286     f->index_ = i;
287     f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
288     selector += upb_handlers_selectorcount(f);
289   }
290   m->selector_count = selector;
291 
292   upb_gfree(fields);
293   return true;
294 }
295 
check_oneofs(upb_msgdef * m,upb_status * s)296 static bool check_oneofs(upb_msgdef *m, upb_status *s) {
297   int i;
298   int first_synthetic = -1;
299   upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
300 
301   for (i = 0; i < m->oneof_count; i++) {
302     mutable_oneofs[i].index = i;
303 
304     if (upb_oneofdef_issynthetic(&mutable_oneofs[i])) {
305       if (first_synthetic == -1) {
306         first_synthetic = i;
307       }
308     } else {
309       if (first_synthetic != -1) {
310         upb_status_seterrf(
311             s, "Synthetic oneofs must be after all other oneofs: %s",
312             upb_oneofdef_name(&mutable_oneofs[i]));
313         return false;
314       }
315     }
316   }
317 
318   if (first_synthetic == -1) {
319     m->real_oneof_count = m->oneof_count;
320   } else {
321     m->real_oneof_count = first_synthetic;
322   }
323 
324   return true;
325 }
326 
assign_msg_wellknowntype(upb_msgdef * m)327 static void assign_msg_wellknowntype(upb_msgdef *m) {
328   const char *name = upb_msgdef_fullname(m);
329   if (name == NULL) {
330     m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
331     return;
332   }
333   if (!strcmp(name, "google.protobuf.Any")) {
334     m->well_known_type = UPB_WELLKNOWN_ANY;
335   } else if (!strcmp(name, "google.protobuf.FieldMask")) {
336     m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
337   } else if (!strcmp(name, "google.protobuf.Duration")) {
338     m->well_known_type = UPB_WELLKNOWN_DURATION;
339   } else if (!strcmp(name, "google.protobuf.Timestamp")) {
340     m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
341   } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
342     m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
343   } else if (!strcmp(name, "google.protobuf.FloatValue")) {
344     m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
345   } else if (!strcmp(name, "google.protobuf.Int64Value")) {
346     m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
347   } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
348     m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
349   } else if (!strcmp(name, "google.protobuf.Int32Value")) {
350     m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
351   } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
352     m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
353   } else if (!strcmp(name, "google.protobuf.BoolValue")) {
354     m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
355   } else if (!strcmp(name, "google.protobuf.StringValue")) {
356     m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
357   } else if (!strcmp(name, "google.protobuf.BytesValue")) {
358     m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
359   } else if (!strcmp(name, "google.protobuf.Value")) {
360     m->well_known_type = UPB_WELLKNOWN_VALUE;
361   } else if (!strcmp(name, "google.protobuf.ListValue")) {
362     m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
363   } else if (!strcmp(name, "google.protobuf.Struct")) {
364     m->well_known_type = UPB_WELLKNOWN_STRUCT;
365   } else {
366     m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
367   }
368 }
369 
370 
371 /* upb_enumdef ****************************************************************/
372 
upb_enumdef_fullname(const upb_enumdef * e)373 const char *upb_enumdef_fullname(const upb_enumdef *e) {
374   return e->full_name;
375 }
376 
upb_enumdef_name(const upb_enumdef * e)377 const char *upb_enumdef_name(const upb_enumdef *e) {
378   return shortdefname(e->full_name);
379 }
380 
upb_enumdef_file(const upb_enumdef * e)381 const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
382   return e->file;
383 }
384 
upb_enumdef_default(const upb_enumdef * e)385 int32_t upb_enumdef_default(const upb_enumdef *e) {
386   UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
387   return e->defaultval;
388 }
389 
upb_enumdef_numvals(const upb_enumdef * e)390 int upb_enumdef_numvals(const upb_enumdef *e) {
391   return (int)upb_strtable_count(&e->ntoi);
392 }
393 
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)394 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
395   /* We iterate over the ntoi table, to account for duplicate numbers. */
396   upb_strtable_begin(i, &e->ntoi);
397 }
398 
upb_enum_next(upb_enum_iter * iter)399 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)400 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
401 
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)402 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
403                       size_t len, int32_t *num) {
404   upb_value v;
405   if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
406     return false;
407   }
408   if (num) *num = upb_value_getint32(v);
409   return true;
410 }
411 
upb_enumdef_iton(const upb_enumdef * def,int32_t num)412 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
413   upb_value v;
414   return upb_inttable_lookup32(&def->iton, num, &v) ?
415       upb_value_getcstr(v) : NULL;
416 }
417 
upb_enum_iter_name(upb_enum_iter * iter)418 const char *upb_enum_iter_name(upb_enum_iter *iter) {
419   return upb_strtable_iter_key(iter).data;
420 }
421 
upb_enum_iter_number(upb_enum_iter * iter)422 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
423   return upb_value_getint32(upb_strtable_iter_value(iter));
424 }
425 
426 
427 /* upb_fielddef ***************************************************************/
428 
upb_fielddef_fullname(const upb_fielddef * f)429 const char *upb_fielddef_fullname(const upb_fielddef *f) {
430   return f->full_name;
431 }
432 
upb_fielddef_type(const upb_fielddef * f)433 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
434   switch (f->type_) {
435     case UPB_DESCRIPTOR_TYPE_DOUBLE:
436       return UPB_TYPE_DOUBLE;
437     case UPB_DESCRIPTOR_TYPE_FLOAT:
438       return UPB_TYPE_FLOAT;
439     case UPB_DESCRIPTOR_TYPE_INT64:
440     case UPB_DESCRIPTOR_TYPE_SINT64:
441     case UPB_DESCRIPTOR_TYPE_SFIXED64:
442       return UPB_TYPE_INT64;
443     case UPB_DESCRIPTOR_TYPE_INT32:
444     case UPB_DESCRIPTOR_TYPE_SFIXED32:
445     case UPB_DESCRIPTOR_TYPE_SINT32:
446       return UPB_TYPE_INT32;
447     case UPB_DESCRIPTOR_TYPE_UINT64:
448     case UPB_DESCRIPTOR_TYPE_FIXED64:
449       return UPB_TYPE_UINT64;
450     case UPB_DESCRIPTOR_TYPE_UINT32:
451     case UPB_DESCRIPTOR_TYPE_FIXED32:
452       return UPB_TYPE_UINT32;
453     case UPB_DESCRIPTOR_TYPE_ENUM:
454       return UPB_TYPE_ENUM;
455     case UPB_DESCRIPTOR_TYPE_BOOL:
456       return UPB_TYPE_BOOL;
457     case UPB_DESCRIPTOR_TYPE_STRING:
458       return UPB_TYPE_STRING;
459     case UPB_DESCRIPTOR_TYPE_BYTES:
460       return UPB_TYPE_BYTES;
461     case UPB_DESCRIPTOR_TYPE_GROUP:
462     case UPB_DESCRIPTOR_TYPE_MESSAGE:
463       return UPB_TYPE_MESSAGE;
464   }
465   UPB_UNREACHABLE();
466 }
467 
upb_fielddef_descriptortype(const upb_fielddef * f)468 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
469   return f->type_;
470 }
471 
upb_fielddef_index(const upb_fielddef * f)472 uint32_t upb_fielddef_index(const upb_fielddef *f) {
473   return f->index_;
474 }
475 
upb_fielddef_label(const upb_fielddef * f)476 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
477   return f->label_;
478 }
479 
upb_fielddef_number(const upb_fielddef * f)480 uint32_t upb_fielddef_number(const upb_fielddef *f) {
481   return f->number_;
482 }
483 
upb_fielddef_isextension(const upb_fielddef * f)484 bool upb_fielddef_isextension(const upb_fielddef *f) {
485   return f->is_extension_;
486 }
487 
upb_fielddef_lazy(const upb_fielddef * f)488 bool upb_fielddef_lazy(const upb_fielddef *f) {
489   return f->lazy_;
490 }
491 
upb_fielddef_packed(const upb_fielddef * f)492 bool upb_fielddef_packed(const upb_fielddef *f) {
493   return f->packed_;
494 }
495 
upb_fielddef_name(const upb_fielddef * f)496 const char *upb_fielddef_name(const upb_fielddef *f) {
497   return shortdefname(f->full_name);
498 }
499 
upb_fielddef_jsonname(const upb_fielddef * f)500 const char *upb_fielddef_jsonname(const upb_fielddef *f) {
501   return f->json_name;
502 }
503 
upb_fielddef_selectorbase(const upb_fielddef * f)504 uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
505   return f->selector_base;
506 }
507 
upb_fielddef_file(const upb_fielddef * f)508 const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
509   return f->file;
510 }
511 
upb_fielddef_containingtype(const upb_fielddef * f)512 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
513   return f->msgdef;
514 }
515 
upb_fielddef_containingoneof(const upb_fielddef * f)516 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
517   return f->oneof;
518 }
519 
upb_fielddef_realcontainingoneof(const upb_fielddef * f)520 const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
521   if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
522   return f->oneof;
523 }
524 
chkdefaulttype(const upb_fielddef * f,int ctype)525 static void chkdefaulttype(const upb_fielddef *f, int ctype) {
526   UPB_UNUSED(f);
527   UPB_UNUSED(ctype);
528 }
529 
upb_fielddef_defaultint64(const upb_fielddef * f)530 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
531   chkdefaulttype(f, UPB_TYPE_INT64);
532   return f->defaultval.sint;
533 }
534 
upb_fielddef_defaultint32(const upb_fielddef * f)535 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
536   chkdefaulttype(f, UPB_TYPE_INT32);
537   return (int32_t)f->defaultval.sint;
538 }
539 
upb_fielddef_defaultuint64(const upb_fielddef * f)540 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
541   chkdefaulttype(f, UPB_TYPE_UINT64);
542   return f->defaultval.uint;
543 }
544 
upb_fielddef_defaultuint32(const upb_fielddef * f)545 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
546   chkdefaulttype(f, UPB_TYPE_UINT32);
547   return (uint32_t)f->defaultval.uint;
548 }
549 
upb_fielddef_defaultbool(const upb_fielddef * f)550 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
551   chkdefaulttype(f, UPB_TYPE_BOOL);
552   return f->defaultval.boolean;
553 }
554 
upb_fielddef_defaultfloat(const upb_fielddef * f)555 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
556   chkdefaulttype(f, UPB_TYPE_FLOAT);
557   return f->defaultval.flt;
558 }
559 
upb_fielddef_defaultdouble(const upb_fielddef * f)560 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
561   chkdefaulttype(f, UPB_TYPE_DOUBLE);
562   return f->defaultval.dbl;
563 }
564 
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)565 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
566   str_t *str = f->defaultval.str;
567   UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
568          upb_fielddef_type(f) == UPB_TYPE_BYTES ||
569          upb_fielddef_type(f) == UPB_TYPE_ENUM);
570   if (str) {
571     if (len) *len = str->len;
572     return str->str;
573   } else {
574     if (len) *len = 0;
575     return NULL;
576   }
577 }
578 
upb_fielddef_msgsubdef(const upb_fielddef * f)579 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
580   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE ? f->sub.msgdef : NULL;
581 }
582 
upb_fielddef_enumsubdef(const upb_fielddef * f)583 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
584   return upb_fielddef_type(f) == UPB_TYPE_ENUM ? f->sub.enumdef : NULL;
585 }
586 
upb_fielddef_layout(const upb_fielddef * f)587 const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
588   return &f->msgdef->layout->fields[f->layout_index];
589 }
590 
upb_fielddef_issubmsg(const upb_fielddef * f)591 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
592   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
593 }
594 
upb_fielddef_isstring(const upb_fielddef * f)595 bool upb_fielddef_isstring(const upb_fielddef *f) {
596   return upb_fielddef_type(f) == UPB_TYPE_STRING ||
597          upb_fielddef_type(f) == UPB_TYPE_BYTES;
598 }
599 
upb_fielddef_isseq(const upb_fielddef * f)600 bool upb_fielddef_isseq(const upb_fielddef *f) {
601   return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
602 }
603 
upb_fielddef_isprimitive(const upb_fielddef * f)604 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
605   return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
606 }
607 
upb_fielddef_ismap(const upb_fielddef * f)608 bool upb_fielddef_ismap(const upb_fielddef *f) {
609   return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
610          upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
611 }
612 
upb_fielddef_hassubdef(const upb_fielddef * f)613 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
614   return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
615 }
616 
upb_fielddef_haspresence(const upb_fielddef * f)617 bool upb_fielddef_haspresence(const upb_fielddef *f) {
618   if (upb_fielddef_isseq(f)) return false;
619   return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
620          f->file->syntax == UPB_SYNTAX_PROTO2;
621 }
622 
between(int32_t x,int32_t low,int32_t high)623 static bool between(int32_t x, int32_t low, int32_t high) {
624   return x >= low && x <= high;
625 }
626 
upb_fielddef_checklabel(int32_t label)627 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)628 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)629 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
630 
upb_fielddef_checkdescriptortype(int32_t type)631 bool upb_fielddef_checkdescriptortype(int32_t type) {
632   return between(type, 1, 18);
633 }
634 
635 /* upb_msgdef *****************************************************************/
636 
upb_msgdef_fullname(const upb_msgdef * m)637 const char *upb_msgdef_fullname(const upb_msgdef *m) {
638   return m->full_name;
639 }
640 
upb_msgdef_file(const upb_msgdef * m)641 const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
642   return m->file;
643 }
644 
upb_msgdef_name(const upb_msgdef * m)645 const char *upb_msgdef_name(const upb_msgdef *m) {
646   return shortdefname(m->full_name);
647 }
648 
upb_msgdef_syntax(const upb_msgdef * m)649 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
650   return m->file->syntax;
651 }
652 
upb_msgdef_selectorcount(const upb_msgdef * m)653 size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
654   return m->selector_count;
655 }
656 
upb_msgdef_submsgfieldcount(const upb_msgdef * m)657 uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
658   return m->submsg_field_count;
659 }
660 
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)661 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
662   upb_value val;
663   return upb_inttable_lookup32(&m->itof, i, &val) ?
664       upb_value_getconstptr(val) : NULL;
665 }
666 
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)667 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
668                                     size_t len) {
669   upb_value val;
670 
671   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
672     return NULL;
673   }
674 
675   return unpack_def(val, UPB_DEFTYPE_FIELD);
676 }
677 
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)678 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
679                                     size_t len) {
680   upb_value val;
681 
682   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
683     return NULL;
684   }
685 
686   return unpack_def(val, UPB_DEFTYPE_ONEOF);
687 }
688 
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)689 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
690                            const upb_fielddef **f, const upb_oneofdef **o) {
691   upb_value val;
692 
693   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
694     return false;
695   }
696 
697   *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
698   *f = unpack_def(val, UPB_DEFTYPE_FIELD);
699   return *o || *f;  /* False if this was a JSON name. */
700 }
701 
upb_msgdef_lookupjsonname(const upb_msgdef * m,const char * name,size_t len)702 const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
703                                               const char *name, size_t len) {
704   upb_value val;
705   const upb_fielddef* f;
706 
707   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
708     return NULL;
709   }
710 
711   f = unpack_def(val, UPB_DEFTYPE_FIELD);
712   if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
713 
714   return f;
715 }
716 
upb_msgdef_numfields(const upb_msgdef * m)717 int upb_msgdef_numfields(const upb_msgdef *m) {
718   return m->field_count;
719 }
720 
upb_msgdef_numoneofs(const upb_msgdef * m)721 int upb_msgdef_numoneofs(const upb_msgdef *m) {
722   return m->oneof_count;
723 }
724 
upb_msgdef_numrealoneofs(const upb_msgdef * m)725 int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
726   return m->real_oneof_count;
727 }
728 
upb_msgdef_layout(const upb_msgdef * m)729 const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
730   return m->layout;
731 }
732 
_upb_msgdef_field(const upb_msgdef * m,int i)733 const upb_fielddef *_upb_msgdef_field(const upb_msgdef *m, int i) {
734   if (i >= m->field_count) return NULL;
735   return &m->fields[i];
736 }
737 
upb_msgdef_mapentry(const upb_msgdef * m)738 bool upb_msgdef_mapentry(const upb_msgdef *m) {
739   return m->map_entry;
740 }
741 
upb_msgdef_wellknowntype(const upb_msgdef * m)742 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
743   return m->well_known_type;
744 }
745 
upb_msgdef_isnumberwrapper(const upb_msgdef * m)746 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
747   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
748   return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
749          type <= UPB_WELLKNOWN_UINT32VALUE;
750 }
751 
upb_msgdef_iswrapper(const upb_msgdef * m)752 bool upb_msgdef_iswrapper(const upb_msgdef *m) {
753   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
754   return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
755          type <= UPB_WELLKNOWN_BOOLVALUE;
756 }
757 
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)758 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
759   upb_inttable_begin(iter, &m->itof);
760 }
761 
upb_msg_field_next(upb_msg_field_iter * iter)762 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
763 
upb_msg_field_done(const upb_msg_field_iter * iter)764 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
765   return upb_inttable_done(iter);
766 }
767 
upb_msg_iter_field(const upb_msg_field_iter * iter)768 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
769   return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
770 }
771 
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)772 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
773   upb_inttable_iter_setdone(iter);
774 }
775 
upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,const upb_msg_field_iter * iter2)776 bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
777                                 const upb_msg_field_iter * iter2) {
778   return upb_inttable_iter_isequal(iter1, iter2);
779 }
780 
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)781 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
782   upb_strtable_begin(iter, &m->ntof);
783   /* We need to skip past any initial fields. */
784   while (!upb_strtable_done(iter) &&
785          !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
786     upb_strtable_next(iter);
787   }
788 }
789 
upb_msg_oneof_next(upb_msg_oneof_iter * iter)790 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
791   /* We need to skip past fields to return only oneofs. */
792   do {
793     upb_strtable_next(iter);
794   } while (!upb_strtable_done(iter) &&
795            !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
796 }
797 
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)798 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
799   return upb_strtable_done(iter);
800 }
801 
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)802 const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
803   return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
804 }
805 
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)806 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
807   upb_strtable_iter_setdone(iter);
808 }
809 
upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter * iter1,const upb_msg_oneof_iter * iter2)810 bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
811                                 const upb_msg_oneof_iter *iter2) {
812   return upb_strtable_iter_isequal(iter1, iter2);
813 }
814 
815 /* upb_oneofdef ***************************************************************/
816 
upb_oneofdef_name(const upb_oneofdef * o)817 const char *upb_oneofdef_name(const upb_oneofdef *o) {
818   return shortdefname(o->full_name);
819 }
820 
upb_oneofdef_containingtype(const upb_oneofdef * o)821 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
822   return o->parent;
823 }
824 
upb_oneofdef_numfields(const upb_oneofdef * o)825 int upb_oneofdef_numfields(const upb_oneofdef *o) {
826   return (int)upb_strtable_count(&o->ntof);
827 }
828 
upb_oneofdef_index(const upb_oneofdef * o)829 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
830   return o->index;
831 }
832 
upb_oneofdef_issynthetic(const upb_oneofdef * o)833 bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
834   upb_inttable_iter iter;
835   const upb_fielddef *f;
836   upb_inttable_begin(&iter, &o->itof);
837   if (upb_oneofdef_numfields(o) != 1) return false;
838   f = upb_value_getptr(upb_inttable_iter_value(&iter));
839   UPB_ASSERT(f);
840   return f->proto3_optional_;
841 }
842 
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)843 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
844                                       const char *name, size_t length) {
845   upb_value val;
846   return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
847       upb_value_getptr(val) : NULL;
848 }
849 
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)850 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
851   upb_value val;
852   return upb_inttable_lookup32(&o->itof, num, &val) ?
853       upb_value_getptr(val) : NULL;
854 }
855 
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)856 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
857   upb_inttable_begin(iter, &o->itof);
858 }
859 
upb_oneof_next(upb_oneof_iter * iter)860 void upb_oneof_next(upb_oneof_iter *iter) {
861   upb_inttable_next(iter);
862 }
863 
upb_oneof_done(upb_oneof_iter * iter)864 bool upb_oneof_done(upb_oneof_iter *iter) {
865   return upb_inttable_done(iter);
866 }
867 
upb_oneof_iter_field(const upb_oneof_iter * iter)868 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
869   return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
870 }
871 
upb_oneof_iter_setdone(upb_oneof_iter * iter)872 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
873   upb_inttable_iter_setdone(iter);
874 }
875 
876 /* Dynamic Layout Generation. *************************************************/
877 
div_round_up(size_t n,size_t d)878 static size_t div_round_up(size_t n, size_t d) {
879   return (n + d - 1) / d;
880 }
881 
upb_msgval_sizeof(upb_fieldtype_t type)882 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
883   switch (type) {
884     case UPB_TYPE_DOUBLE:
885     case UPB_TYPE_INT64:
886     case UPB_TYPE_UINT64:
887       return 8;
888     case UPB_TYPE_ENUM:
889     case UPB_TYPE_INT32:
890     case UPB_TYPE_UINT32:
891     case UPB_TYPE_FLOAT:
892       return 4;
893     case UPB_TYPE_BOOL:
894       return 1;
895     case UPB_TYPE_MESSAGE:
896       return sizeof(void*);
897     case UPB_TYPE_BYTES:
898     case UPB_TYPE_STRING:
899       return sizeof(upb_strview);
900   }
901   UPB_UNREACHABLE();
902 }
903 
upb_msg_fielddefsize(const upb_fielddef * f)904 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
905   if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
906     upb_map_entry ent;
907     UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
908     return sizeof(ent.k);
909   } else if (upb_fielddef_isseq(f)) {
910     return sizeof(void*);
911   } else {
912     return upb_msgval_sizeof(upb_fielddef_type(f));
913   }
914 }
915 
upb_msglayout_place(upb_msglayout * l,size_t size)916 static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
917   uint32_t ret;
918 
919   l->size = UPB_ALIGN_UP(l->size, size);
920   ret = l->size;
921   l->size += size;
922   return ret;
923 }
924 
field_number_cmp(const void * p1,const void * p2)925 static int field_number_cmp(const void *p1, const void *p2) {
926   const upb_msglayout_field *f1 = p1;
927   const upb_msglayout_field *f2 = p2;
928   return f1->number - f2->number;
929 }
930 
assign_layout_indices(const upb_msgdef * m,upb_msglayout_field * fields)931 static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fields) {
932   int i;
933   int n = upb_msgdef_numfields(m);
934   for (i = 0; i < n; i++) {
935     upb_fielddef *f = (upb_fielddef*)upb_msgdef_itof(m, fields[i].number);
936     UPB_ASSERT(f);
937     f->layout_index = i;
938   }
939 }
940 
941 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
942  * It computes a dynamic layout for all of the fields in |m|. */
make_layout(const upb_symtab * symtab,const upb_msgdef * m)943 static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
944   upb_msglayout *l = (upb_msglayout*)m->layout;
945   upb_msg_field_iter it;
946   upb_msg_oneof_iter oit;
947   size_t hasbit;
948   size_t submsg_count = m->submsg_field_count;
949   const upb_msglayout **submsgs;
950   upb_msglayout_field *fields;
951   upb_alloc *alloc = upb_arena_alloc(symtab->arena);
952 
953   memset(l, 0, sizeof(*l));
954 
955   fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields));
956   submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs));
957 
958   if ((!fields && upb_msgdef_numfields(m)) ||
959       (!submsgs && submsg_count)) {
960     /* OOM. */
961     return false;
962   }
963 
964   l->field_count = upb_msgdef_numfields(m);
965   l->fields = fields;
966   l->submsgs = submsgs;
967 
968   if (upb_msgdef_mapentry(m)) {
969     /* TODO(haberman): refactor this method so this special case is more
970      * elegant. */
971     const upb_fielddef *key = upb_msgdef_itof(m, 1);
972     const upb_fielddef *val = upb_msgdef_itof(m, 2);
973     fields[0].number = 1;
974     fields[1].number = 2;
975     fields[0].label = UPB_LABEL_OPTIONAL;
976     fields[1].label = UPB_LABEL_OPTIONAL;
977     fields[0].presence = 0;
978     fields[1].presence = 0;
979     fields[0].descriptortype = upb_fielddef_descriptortype(key);
980     fields[1].descriptortype = upb_fielddef_descriptortype(val);
981     fields[0].offset = 0;
982     fields[1].offset = sizeof(upb_strview);
983     fields[1].submsg_index = 0;
984 
985     if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
986       submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
987     }
988 
989     l->field_count = 2;
990     l->size = 2 * sizeof(upb_strview);
991     l->size = UPB_ALIGN_UP(l->size, 8);
992     return true;
993   }
994 
995   /* Allocate data offsets in three stages:
996    *
997    * 1. hasbits.
998    * 2. regular fields.
999    * 3. oneof fields.
1000    *
1001    * OPT: There is a lot of room for optimization here to minimize the size.
1002    */
1003 
1004   /* Allocate hasbits and set basic field attributes. */
1005   submsg_count = 0;
1006   for (upb_msg_field_begin(&it, m), hasbit = 0;
1007        !upb_msg_field_done(&it);
1008        upb_msg_field_next(&it)) {
1009     upb_fielddef* f = upb_msg_iter_field(&it);
1010     upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
1011 
1012     field->number = upb_fielddef_number(f);
1013     field->descriptortype = upb_fielddef_descriptortype(f);
1014     field->label = upb_fielddef_label(f);
1015 
1016     if (field->descriptortype == UPB_DTYPE_STRING &&
1017         f->file->syntax == UPB_SYNTAX_PROTO2) {
1018       /* See TableDescriptorType() in upbc/generator.cc for details and
1019        * rationale. */
1020       field->descriptortype = UPB_DTYPE_BYTES;
1021     }
1022 
1023     if (upb_fielddef_ismap(f)) {
1024       field->label = _UPB_LABEL_MAP;
1025     } else if (upb_fielddef_packed(f)) {
1026       field->label = _UPB_LABEL_PACKED;
1027     }
1028 
1029     if (upb_fielddef_issubmsg(f)) {
1030       const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
1031       field->submsg_index = submsg_count++;
1032       submsgs[field->submsg_index] = subm->layout;
1033     }
1034 
1035     if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
1036       /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
1037        * table. This wastes one hasbit, but we don't worry about it for now. */
1038       field->presence = ++hasbit;
1039     } else {
1040       field->presence = 0;
1041     }
1042   }
1043 
1044   /* Account for space used by hasbits. */
1045   l->size = div_round_up(hasbit, 8);
1046 
1047   /* Allocate non-oneof fields. */
1048   for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
1049        upb_msg_field_next(&it)) {
1050     const upb_fielddef* f = upb_msg_iter_field(&it);
1051     size_t field_size = upb_msg_fielddefsize(f);
1052     size_t index = upb_fielddef_index(f);
1053 
1054     if (upb_fielddef_realcontainingoneof(f)) {
1055       /* Oneofs are handled separately below. */
1056       continue;
1057     }
1058 
1059     fields[index].offset = upb_msglayout_place(l, field_size);
1060   }
1061 
1062   /* Allocate oneof fields.  Each oneof field consists of a uint32 for the case
1063    * and space for the actual data. */
1064   for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
1065        upb_msg_oneof_next(&oit)) {
1066     const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
1067     upb_oneof_iter fit;
1068 
1069     size_t case_size = sizeof(uint32_t);  /* Could potentially optimize this. */
1070     size_t field_size = 0;
1071     uint32_t case_offset;
1072     uint32_t data_offset;
1073 
1074     if (upb_oneofdef_issynthetic(o)) continue;
1075 
1076     /* Calculate field size: the max of all field sizes. */
1077     for (upb_oneof_begin(&fit, o);
1078          !upb_oneof_done(&fit);
1079          upb_oneof_next(&fit)) {
1080       const upb_fielddef* f = upb_oneof_iter_field(&fit);
1081       field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
1082     }
1083 
1084     /* Align and allocate case offset. */
1085     case_offset = upb_msglayout_place(l, case_size);
1086     data_offset = upb_msglayout_place(l, field_size);
1087 
1088     for (upb_oneof_begin(&fit, o);
1089          !upb_oneof_done(&fit);
1090          upb_oneof_next(&fit)) {
1091       const upb_fielddef* f = upb_oneof_iter_field(&fit);
1092       fields[upb_fielddef_index(f)].offset = data_offset;
1093       fields[upb_fielddef_index(f)].presence = ~case_offset;
1094     }
1095   }
1096 
1097   /* Size of the entire structure should be a multiple of its greatest
1098    * alignment.  TODO: track overall alignment for real? */
1099   l->size = UPB_ALIGN_UP(l->size, 8);
1100 
1101   /* Sort fields by number. */
1102   qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp);
1103   assign_layout_indices(m, fields);
1104 
1105   return true;
1106 }
1107 
1108 /* Code to build defs from descriptor protos. *********************************/
1109 
1110 /* There is a question of how much validation to do here.  It will be difficult
1111  * to perfectly match the amount of validation performed by proto2.  But since
1112  * this code is used to directly build defs from Ruby (for example) we do need
1113  * to validate important constraints like uniqueness of names and numbers. */
1114 
1115 #define CHK(x) if (!(x)) { return false; }
1116 #define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; }
1117 
1118 typedef struct {
1119   const upb_symtab *symtab;
1120   upb_filedef *file;              /* File we are building. */
1121   upb_alloc *alloc;               /* Allocate defs here. */
1122   upb_alloc *tmp;                 /* Alloc for addtab and any other tmp data. */
1123   upb_strtable *addtab;           /* full_name -> packed def ptr for new defs */
1124   const upb_msglayout **layouts;  /* NULL if we should build layouts. */
1125   upb_status *status;             /* Record errors here. */
1126 } symtab_addctx;
1127 
strviewdup(const symtab_addctx * ctx,upb_strview view)1128 static char* strviewdup(const symtab_addctx *ctx, upb_strview view) {
1129   return upb_strdup2(view.data, view.size, ctx->alloc);
1130 }
1131 
streql2(const char * a,size_t n,const char * b)1132 static bool streql2(const char *a, size_t n, const char *b) {
1133   return n == strlen(b) && memcmp(a, b, n) == 0;
1134 }
1135 
streql_view(upb_strview view,const char * b)1136 static bool streql_view(upb_strview view, const char *b) {
1137   return streql2(view.data, view.size, b);
1138 }
1139 
makefullname(const symtab_addctx * ctx,const char * prefix,upb_strview name)1140 static const char *makefullname(const symtab_addctx *ctx, const char *prefix,
1141                                 upb_strview name) {
1142   if (prefix) {
1143     /* ret = prefix + '.' + name; */
1144     size_t n = strlen(prefix);
1145     char *ret = upb_malloc(ctx->alloc, n + name.size + 2);
1146     CHK_OOM(ret);
1147     strcpy(ret, prefix);
1148     ret[n] = '.';
1149     memcpy(&ret[n + 1], name.data, name.size);
1150     ret[n + 1 + name.size] = '\0';
1151     return ret;
1152   } else {
1153     return strviewdup(ctx, name);
1154   }
1155 }
1156 
getjsonname(const char * name,char * buf,size_t len)1157 size_t getjsonname(const char *name, char *buf, size_t len) {
1158   size_t src, dst = 0;
1159   bool ucase_next = false;
1160 
1161 #define WRITE(byte) \
1162   ++dst; \
1163   if (dst < len) buf[dst - 1] = byte; \
1164   else if (dst == len) buf[dst - 1] = '\0'
1165 
1166   if (!name) {
1167     WRITE('\0');
1168     return 0;
1169   }
1170 
1171   /* Implement the transformation as described in the spec:
1172    *   1. upper case all letters after an underscore.
1173    *   2. remove all underscores.
1174    */
1175   for (src = 0; name[src]; src++) {
1176     if (name[src] == '_') {
1177       ucase_next = true;
1178       continue;
1179     }
1180 
1181     if (ucase_next) {
1182       WRITE(toupper(name[src]));
1183       ucase_next = false;
1184     } else {
1185       WRITE(name[src]);
1186     }
1187   }
1188 
1189   WRITE('\0');
1190   return dst;
1191 
1192 #undef WRITE
1193 }
1194 
makejsonname(const char * name,upb_alloc * alloc)1195 static char* makejsonname(const char* name, upb_alloc *alloc) {
1196   size_t size = getjsonname(name, NULL, 0);
1197   char* json_name = upb_malloc(alloc, size);
1198   getjsonname(name, json_name, size);
1199   return json_name;
1200 }
1201 
symtab_add(const symtab_addctx * ctx,const char * name,upb_value v)1202 static bool symtab_add(const symtab_addctx *ctx, const char *name,
1203                        upb_value v) {
1204   upb_value tmp;
1205   if (upb_strtable_lookup(ctx->addtab, name, &tmp) ||
1206       upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) {
1207     upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name);
1208     return false;
1209   }
1210 
1211   CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp));
1212   return true;
1213 }
1214 
1215 /* Given a symbol and the base symbol inside which it is defined, find the
1216  * symbol's definition in t. */
resolvename(const upb_strtable * t,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type,upb_status * status,const void ** def)1217 static bool resolvename(const upb_strtable *t, const upb_fielddef *f,
1218                         const char *base, upb_strview sym,
1219                         upb_deftype_t type, upb_status *status,
1220                         const void **def) {
1221   if(sym.size == 0) return false;
1222   if(sym.data[0] == '.') {
1223     /* Symbols starting with '.' are absolute, so we do a single lookup.
1224      * Slice to omit the leading '.' */
1225     upb_value v;
1226     if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
1227       return false;
1228     }
1229 
1230     *def = unpack_def(v, type);
1231 
1232     if (!*def) {
1233       upb_status_seterrf(status,
1234                          "type mismatch when resolving field %s, name %s",
1235                          f->full_name, sym.data);
1236       return false;
1237     }
1238 
1239     return true;
1240   } else {
1241     /* Remove components from base until we find an entry or run out.
1242      * TODO: This branch is totally broken, but currently not used. */
1243     (void)base;
1244     UPB_ASSERT(false);
1245     return false;
1246   }
1247 }
1248 
symtab_resolve(const symtab_addctx * ctx,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type)1249 const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f,
1250                            const char *base, upb_strview sym,
1251                            upb_deftype_t type) {
1252   const void *ret;
1253   if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) &&
1254       !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) {
1255     if (upb_ok(ctx->status)) {
1256       upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data);
1257     }
1258     return false;
1259   }
1260   return ret;
1261 }
1262 
create_oneofdef(const symtab_addctx * ctx,upb_msgdef * m,const google_protobuf_OneofDescriptorProto * oneof_proto)1263 static bool create_oneofdef(
1264     const symtab_addctx *ctx, upb_msgdef *m,
1265     const google_protobuf_OneofDescriptorProto *oneof_proto) {
1266   upb_oneofdef *o;
1267   upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
1268   upb_value v;
1269 
1270   o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
1271   o->parent = m;
1272   o->full_name = makefullname(ctx, m->full_name, name);
1273 
1274   v = pack_def(o, UPB_DEFTYPE_ONEOF);
1275   CHK_OOM(symtab_add(ctx, o->full_name, v));
1276   CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
1277 
1278   CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1279   CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1280 
1281   return true;
1282 }
1283 
parse_default(const symtab_addctx * ctx,const char * str,size_t len,upb_fielddef * f)1284 static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len,
1285                           upb_fielddef *f) {
1286   char *end;
1287   char nullz[64];
1288   errno = 0;
1289 
1290   switch (upb_fielddef_type(f)) {
1291     case UPB_TYPE_INT32:
1292     case UPB_TYPE_INT64:
1293     case UPB_TYPE_UINT32:
1294     case UPB_TYPE_UINT64:
1295     case UPB_TYPE_DOUBLE:
1296     case UPB_TYPE_FLOAT:
1297       /* Standard C number parsing functions expect null-terminated strings. */
1298       if (len >= sizeof(nullz) - 1) {
1299         return false;
1300       }
1301       memcpy(nullz, str, len);
1302       nullz[len] = '\0';
1303       str = nullz;
1304       break;
1305     default:
1306       break;
1307   }
1308 
1309   switch (upb_fielddef_type(f)) {
1310     case UPB_TYPE_INT32: {
1311       long val = strtol(str, &end, 0);
1312       CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end);
1313       f->defaultval.sint = val;
1314       break;
1315     }
1316     case UPB_TYPE_ENUM: {
1317       const upb_enumdef *e = f->sub.enumdef;
1318       int32_t val;
1319       CHK(upb_enumdef_ntoi(e, str, len, &val));
1320       f->defaultval.sint = val;
1321       break;
1322     }
1323     case UPB_TYPE_INT64: {
1324       /* XXX: Need to write our own strtoll, since it's not available in c89. */
1325       int64_t val = strtol(str, &end, 0);
1326       CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end);
1327       f->defaultval.sint = val;
1328       break;
1329     }
1330     case UPB_TYPE_UINT32: {
1331       unsigned long val = strtoul(str, &end, 0);
1332       CHK(val <= UINT32_MAX && errno != ERANGE && !*end);
1333       f->defaultval.uint = val;
1334       break;
1335     }
1336     case UPB_TYPE_UINT64: {
1337       /* XXX: Need to write our own strtoull, since it's not available in c89. */
1338       uint64_t val = strtoul(str, &end, 0);
1339       CHK(val <= UINT64_MAX && errno != ERANGE && !*end);
1340       f->defaultval.uint = val;
1341       break;
1342     }
1343     case UPB_TYPE_DOUBLE: {
1344       double val = strtod(str, &end);
1345       CHK(errno != ERANGE && !*end);
1346       f->defaultval.dbl = val;
1347       break;
1348     }
1349     case UPB_TYPE_FLOAT: {
1350       /* XXX: Need to write our own strtof, since it's not available in c89. */
1351       float val = strtod(str, &end);
1352       CHK(errno != ERANGE && !*end);
1353       f->defaultval.flt = val;
1354       break;
1355     }
1356     case UPB_TYPE_BOOL: {
1357       if (streql2(str, len, "false")) {
1358         f->defaultval.boolean = false;
1359       } else if (streql2(str, len, "true")) {
1360         f->defaultval.boolean = true;
1361       } else {
1362         return false;
1363       }
1364       break;
1365     }
1366     case UPB_TYPE_STRING:
1367       f->defaultval.str = newstr(ctx->alloc, str, len);
1368       break;
1369     case UPB_TYPE_BYTES:
1370       /* XXX: need to interpret the C-escaped value. */
1371       f->defaultval.str = newstr(ctx->alloc, str, len);
1372       break;
1373     case UPB_TYPE_MESSAGE:
1374       /* Should not have a default value. */
1375       return false;
1376   }
1377   return true;
1378 }
1379 
set_default_default(const symtab_addctx * ctx,upb_fielddef * f)1380 static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) {
1381   switch (upb_fielddef_type(f)) {
1382     case UPB_TYPE_INT32:
1383     case UPB_TYPE_INT64:
1384     case UPB_TYPE_ENUM:
1385       f->defaultval.sint = 0;
1386       break;
1387     case UPB_TYPE_UINT64:
1388     case UPB_TYPE_UINT32:
1389       f->defaultval.uint = 0;
1390       break;
1391     case UPB_TYPE_DOUBLE:
1392     case UPB_TYPE_FLOAT:
1393       f->defaultval.dbl = 0;
1394       break;
1395     case UPB_TYPE_STRING:
1396     case UPB_TYPE_BYTES:
1397       f->defaultval.str = newstr(ctx->alloc, NULL, 0);
1398       break;
1399     case UPB_TYPE_BOOL:
1400       f->defaultval.boolean = false;
1401       break;
1402     case UPB_TYPE_MESSAGE:
1403       break;
1404   }
1405 }
1406 
create_fielddef(const symtab_addctx * ctx,const char * prefix,upb_msgdef * m,const google_protobuf_FieldDescriptorProto * field_proto)1407 static bool create_fielddef(
1408     const symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
1409     const google_protobuf_FieldDescriptorProto *field_proto) {
1410   upb_alloc *alloc = ctx->alloc;
1411   upb_fielddef *f;
1412   const google_protobuf_FieldOptions *options;
1413   upb_strview name;
1414   const char *full_name;
1415   const char *json_name;
1416   const char *shortname;
1417   uint32_t field_number;
1418 
1419   if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
1420     upb_status_seterrmsg(ctx->status, "field has no name");
1421     return false;
1422   }
1423 
1424   name = google_protobuf_FieldDescriptorProto_name(field_proto);
1425   CHK(upb_isident(name, false, ctx->status));
1426   full_name = makefullname(ctx, prefix, name);
1427   shortname = shortdefname(full_name);
1428 
1429   if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
1430     json_name = strviewdup(
1431         ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
1432   } else {
1433     json_name = makejsonname(shortname, ctx->alloc);
1434   }
1435 
1436   field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
1437 
1438   if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
1439     upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number);
1440     return false;
1441   }
1442 
1443   if (m) {
1444     /* direct message field. */
1445     upb_value v, field_v, json_v;
1446     size_t json_size;
1447 
1448     f = (upb_fielddef*)&m->fields[m->field_count++];
1449     f->msgdef = m;
1450     f->is_extension_ = false;
1451 
1452     if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
1453       upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname);
1454       return false;
1455     }
1456 
1457     if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
1458       upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name);
1459       return false;
1460     }
1461 
1462     if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
1463       upb_status_seterrf(ctx->status, "duplicate field number (%u)",
1464                          field_number);
1465       return false;
1466     }
1467 
1468     field_v = pack_def(f, UPB_DEFTYPE_FIELD);
1469     json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
1470     v = upb_value_constptr(f);
1471     json_size = strlen(json_name);
1472 
1473     CHK_OOM(
1474         upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc));
1475     CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc));
1476 
1477     if (strcmp(shortname, json_name) != 0) {
1478       upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc);
1479     }
1480 
1481     if (ctx->layouts) {
1482       const upb_msglayout_field *fields = m->layout->fields;
1483       int count = m->layout->field_count;
1484       bool found = false;
1485       int i;
1486       for (i = 0; i < count; i++) {
1487         if (fields[i].number == field_number) {
1488           f->layout_index = i;
1489           found = true;
1490           break;
1491         }
1492       }
1493       UPB_ASSERT(found);
1494     }
1495   } else {
1496     /* extension field. */
1497     f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
1498     f->is_extension_ = true;
1499     CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)));
1500   }
1501 
1502   f->full_name = full_name;
1503   f->json_name = json_name;
1504   f->file = ctx->file;
1505   f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
1506   f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
1507   f->number_ = field_number;
1508   f->oneof = NULL;
1509   f->proto3_optional_ =
1510       google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
1511 
1512   /* We can't resolve the subdef or (in the case of extensions) the containing
1513    * message yet, because it may not have been defined yet.  We stash a pointer
1514    * to the field_proto until later when we can properly resolve it. */
1515   f->sub.unresolved = field_proto;
1516 
1517   if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
1518     upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)",
1519                        f->full_name);
1520     return false;
1521   }
1522 
1523   if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
1524     int oneof_index =
1525         google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
1526     upb_oneofdef *oneof;
1527     upb_value v = upb_value_constptr(f);
1528 
1529     if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1530       upb_status_seterrf(ctx->status,
1531                          "fields in oneof must have OPTIONAL label (%s)",
1532                          f->full_name);
1533       return false;
1534     }
1535 
1536     if (!m) {
1537       upb_status_seterrf(ctx->status,
1538                          "oneof_index provided for extension field (%s)",
1539                          f->full_name);
1540       return false;
1541     }
1542 
1543     if (oneof_index >= m->oneof_count) {
1544       upb_status_seterrf(ctx->status, "oneof_index out of range (%s)",
1545                          f->full_name);
1546       return false;
1547     }
1548 
1549     oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
1550     f->oneof = oneof;
1551 
1552     CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
1553     CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
1554   } else {
1555     f->oneof = NULL;
1556   }
1557 
1558   options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ?
1559     google_protobuf_FieldDescriptorProto_options(field_proto) : NULL;
1560 
1561   if (options && google_protobuf_FieldOptions_has_packed(options)) {
1562     f->packed_ = google_protobuf_FieldOptions_packed(options);
1563   } else {
1564     /* Repeated fields default to packed for proto3 only. */
1565     f->packed_ = upb_fielddef_isprimitive(f) &&
1566         f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3;
1567   }
1568 
1569   if (options) {
1570     f->lazy_ = google_protobuf_FieldOptions_lazy(options);
1571   } else {
1572     f->lazy_ = false;
1573   }
1574 
1575   return true;
1576 }
1577 
create_enumdef(const symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumDescriptorProto * enum_proto)1578 static bool create_enumdef(
1579     const symtab_addctx *ctx, const char *prefix,
1580     const google_protobuf_EnumDescriptorProto *enum_proto) {
1581   upb_enumdef *e;
1582   const google_protobuf_EnumValueDescriptorProto *const *values;
1583   upb_strview name;
1584   size_t i, n;
1585 
1586   name = google_protobuf_EnumDescriptorProto_name(enum_proto);
1587   CHK(upb_isident(name, false, ctx->status));
1588 
1589   e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
1590   e->full_name = makefullname(ctx, prefix, name);
1591   CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)));
1592 
1593   CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc));
1594   CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
1595 
1596   e->file = ctx->file;
1597   e->defaultval = 0;
1598 
1599   values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
1600 
1601   if (n == 0) {
1602     upb_status_seterrf(ctx->status,
1603                        "enums must contain at least one value (%s)",
1604                        e->full_name);
1605     return false;
1606   }
1607 
1608   for (i = 0; i < n; i++) {
1609     const google_protobuf_EnumValueDescriptorProto *value = values[i];
1610     upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
1611     char *name2 = strviewdup(ctx, name);
1612     int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
1613     upb_value v = upb_value_int32(num);
1614 
1615     if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
1616       upb_status_seterrf(ctx->status,
1617                          "for proto3, the first enum value must be zero (%s)",
1618                          e->full_name);
1619       return false;
1620     }
1621 
1622     if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
1623       upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2);
1624       return false;
1625     }
1626 
1627     CHK_OOM(name2)
1628     CHK_OOM(
1629         upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
1630 
1631     if (!upb_inttable_lookup(&e->iton, num, NULL)) {
1632       upb_value v = upb_value_cstr(name2);
1633       CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
1634     }
1635   }
1636 
1637   upb_inttable_compact2(&e->iton, ctx->alloc);
1638 
1639   return true;
1640 }
1641 
create_msgdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_DescriptorProto * msg_proto)1642 static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
1643                           const google_protobuf_DescriptorProto *msg_proto) {
1644   upb_msgdef *m;
1645   const google_protobuf_MessageOptions *options;
1646   const google_protobuf_OneofDescriptorProto *const *oneofs;
1647   const google_protobuf_FieldDescriptorProto *const *fields;
1648   const google_protobuf_EnumDescriptorProto *const *enums;
1649   const google_protobuf_DescriptorProto *const *msgs;
1650   size_t i, n;
1651   upb_strview name;
1652 
1653   name = google_protobuf_DescriptorProto_name(msg_proto);
1654   CHK(upb_isident(name, false, ctx->status));
1655 
1656   m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
1657   m->full_name = makefullname(ctx, prefix, name);
1658   CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)));
1659 
1660   CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1661   CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1662 
1663   m->file = ctx->file;
1664   m->map_entry = false;
1665 
1666   options = google_protobuf_DescriptorProto_options(msg_proto);
1667 
1668   if (options) {
1669     m->map_entry = google_protobuf_MessageOptions_map_entry(options);
1670   }
1671 
1672   if (ctx->layouts) {
1673     m->layout = *ctx->layouts;
1674     ctx->layouts++;
1675   } else {
1676     /* Allocate now (to allow cross-linking), populate later. */
1677     m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout));
1678   }
1679 
1680   oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n);
1681   m->oneof_count = 0;
1682   m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n);
1683   for (i = 0; i < n; i++) {
1684     CHK(create_oneofdef(ctx, m, oneofs[i]));
1685   }
1686 
1687   fields = google_protobuf_DescriptorProto_field(msg_proto, &n);
1688   m->field_count = 0;
1689   m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n);
1690   for (i = 0; i < n; i++) {
1691     CHK(create_fielddef(ctx, m->full_name, m, fields[i]));
1692   }
1693 
1694   CHK(assign_msg_indices(m, ctx->status));
1695   CHK(check_oneofs(m, ctx->status));
1696   assign_msg_wellknowntype(m);
1697   upb_inttable_compact2(&m->itof, ctx->alloc);
1698 
1699   /* This message is built.  Now build nested messages and enums. */
1700 
1701   enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1702   for (i = 0; i < n; i++) {
1703     CHK(create_enumdef(ctx, m->full_name, enums[i]));
1704   }
1705 
1706   msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1707   for (i = 0; i < n; i++) {
1708     CHK(create_msgdef(ctx, m->full_name, msgs[i]));
1709   }
1710 
1711   return true;
1712 }
1713 
1714 typedef struct {
1715   int msg_count;
1716   int enum_count;
1717   int ext_count;
1718 } decl_counts;
1719 
count_types_in_msg(const google_protobuf_DescriptorProto * msg_proto,decl_counts * counts)1720 static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
1721                                decl_counts *counts) {
1722   const google_protobuf_DescriptorProto *const *msgs;
1723   size_t i, n;
1724 
1725   counts->msg_count++;
1726 
1727   msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1728   for (i = 0; i < n; i++) {
1729     count_types_in_msg(msgs[i], counts);
1730   }
1731 
1732   google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1733   counts->enum_count += n;
1734 
1735   google_protobuf_DescriptorProto_extension(msg_proto, &n);
1736   counts->ext_count += n;
1737 }
1738 
count_types_in_file(const google_protobuf_FileDescriptorProto * file_proto,decl_counts * counts)1739 static void count_types_in_file(
1740     const google_protobuf_FileDescriptorProto *file_proto,
1741     decl_counts *counts) {
1742   const google_protobuf_DescriptorProto *const *msgs;
1743   size_t i, n;
1744 
1745   msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
1746   for (i = 0; i < n; i++) {
1747     count_types_in_msg(msgs[i], counts);
1748   }
1749 
1750   google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
1751   counts->enum_count += n;
1752 
1753   google_protobuf_FileDescriptorProto_extension(file_proto, &n);
1754   counts->ext_count += n;
1755 }
1756 
resolve_fielddef(const symtab_addctx * ctx,const char * prefix,upb_fielddef * f)1757 static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix,
1758                              upb_fielddef *f) {
1759   upb_strview name;
1760   const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
1761 
1762   if (f->is_extension_) {
1763     if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
1764       upb_status_seterrf(ctx->status,
1765                          "extension for field '%s' had no extendee",
1766                          f->full_name);
1767       return false;
1768     }
1769 
1770     name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
1771     f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1772     CHK(f->msgdef);
1773   }
1774 
1775   if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
1776       !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
1777     upb_status_seterrf(ctx->status, "field '%s' is missing type name",
1778                        f->full_name);
1779     return false;
1780   }
1781 
1782   name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
1783 
1784   if (upb_fielddef_issubmsg(f)) {
1785     f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1786     CHK(f->sub.msgdef);
1787   } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
1788     f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
1789     CHK(f->sub.enumdef);
1790   }
1791 
1792   /* Have to delay resolving of the default value until now because of the enum
1793    * case, since enum defaults are specified with a label. */
1794   if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
1795     upb_strview defaultval =
1796         google_protobuf_FieldDescriptorProto_default_value(field_proto);
1797 
1798     if (f->file->syntax == UPB_SYNTAX_PROTO3) {
1799       upb_status_seterrf(ctx->status,
1800                          "proto3 fields cannot have explicit defaults (%s)",
1801                          f->full_name);
1802       return false;
1803     }
1804 
1805     if (upb_fielddef_issubmsg(f)) {
1806       upb_status_seterrf(ctx->status,
1807                          "message fields cannot have explicit defaults (%s)",
1808                          f->full_name);
1809       return false;
1810     }
1811 
1812     if (!parse_default(ctx, defaultval.data, defaultval.size, f)) {
1813       upb_status_seterrf(ctx->status,
1814                          "couldn't parse default '" UPB_STRVIEW_FORMAT
1815                          "' for field (%s)",
1816                          UPB_STRVIEW_ARGS(defaultval), f->full_name);
1817       return false;
1818     }
1819   } else {
1820     set_default_default(ctx, f);
1821   }
1822 
1823   return true;
1824 }
1825 
build_filedef(symtab_addctx * ctx,upb_filedef * file,const google_protobuf_FileDescriptorProto * file_proto)1826 static bool build_filedef(
1827     symtab_addctx *ctx, upb_filedef *file,
1828     const google_protobuf_FileDescriptorProto *file_proto) {
1829   upb_alloc *alloc = ctx->alloc;
1830   const google_protobuf_FileOptions *file_options_proto;
1831   const google_protobuf_DescriptorProto *const *msgs;
1832   const google_protobuf_EnumDescriptorProto *const *enums;
1833   const google_protobuf_FieldDescriptorProto *const *exts;
1834   const upb_strview* strs;
1835   size_t i, n;
1836   decl_counts counts = {0, 0, 0};
1837 
1838   count_types_in_file(file_proto, &counts);
1839 
1840   file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count);
1841   file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count);
1842   file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count);
1843 
1844   CHK_OOM(counts.msg_count == 0 || file->msgs);
1845   CHK_OOM(counts.enum_count == 0 || file->enums);
1846   CHK_OOM(counts.ext_count == 0 || file->exts);
1847 
1848   /* We increment these as defs are added. */
1849   file->msg_count = 0;
1850   file->enum_count = 0;
1851   file->ext_count = 0;
1852 
1853   if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
1854     upb_status_seterrmsg(ctx->status, "File has no name");
1855     return false;
1856   }
1857 
1858   file->name =
1859       strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
1860   file->phpprefix = NULL;
1861   file->phpnamespace = NULL;
1862 
1863   if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
1864     upb_strview package =
1865         google_protobuf_FileDescriptorProto_package(file_proto);
1866     CHK(upb_isident(package, true, ctx->status));
1867     file->package = strviewdup(ctx, package);
1868   } else {
1869     file->package = NULL;
1870   }
1871 
1872   if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
1873     upb_strview syntax =
1874         google_protobuf_FileDescriptorProto_syntax(file_proto);
1875 
1876     if (streql_view(syntax, "proto2")) {
1877       file->syntax = UPB_SYNTAX_PROTO2;
1878     } else if (streql_view(syntax, "proto3")) {
1879       file->syntax = UPB_SYNTAX_PROTO3;
1880     } else {
1881       upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
1882                          UPB_STRVIEW_ARGS(syntax));
1883       return false;
1884     }
1885   } else {
1886     file->syntax = UPB_SYNTAX_PROTO2;
1887   }
1888 
1889   /* Read options. */
1890   file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
1891   if (file_options_proto) {
1892     if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
1893       file->phpprefix = strviewdup(
1894           ctx,
1895           google_protobuf_FileOptions_php_class_prefix(file_options_proto));
1896     }
1897     if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
1898       file->phpnamespace = strviewdup(
1899           ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
1900     }
1901   }
1902 
1903   /* Verify dependencies. */
1904   strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
1905   file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ;
1906   CHK_OOM(n == 0 || file->deps);
1907 
1908   for (i = 0; i < n; i++) {
1909     upb_strview dep_name = strs[i];
1910     upb_value v;
1911     if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
1912                               dep_name.size, &v)) {
1913       upb_status_seterrf(ctx->status,
1914                          "Depends on file '" UPB_STRVIEW_FORMAT
1915                          "', but it has not been loaded",
1916                          UPB_STRVIEW_ARGS(dep_name));
1917       return false;
1918     }
1919     file->deps[i] = upb_value_getconstptr(v);
1920   }
1921 
1922   /* Create messages. */
1923   msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
1924   for (i = 0; i < n; i++) {
1925     CHK(create_msgdef(ctx, file->package, msgs[i]));
1926   }
1927 
1928   /* Create enums. */
1929   enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
1930   for (i = 0; i < n; i++) {
1931     CHK(create_enumdef(ctx, file->package, enums[i]));
1932   }
1933 
1934   /* Create extensions. */
1935   exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
1936   file->exts = upb_malloc(alloc, sizeof(*file->exts) * n);
1937   CHK_OOM(n == 0 || file->exts);
1938   for (i = 0; i < n; i++) {
1939     CHK(create_fielddef(ctx, file->package, NULL, exts[i]));
1940   }
1941 
1942   /* Now that all names are in the table, build layouts and resolve refs. */
1943   for (i = 0; i < (size_t)file->ext_count; i++) {
1944     CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]));
1945   }
1946 
1947   for (i = 0; i < (size_t)file->msg_count; i++) {
1948     const upb_msgdef *m = &file->msgs[i];
1949     int j;
1950     for (j = 0; j < m->field_count; j++) {
1951       CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]));
1952     }
1953   }
1954 
1955   if (!ctx->layouts) {
1956     for (i = 0; i < (size_t)file->msg_count; i++) {
1957       const upb_msgdef *m = &file->msgs[i];
1958       make_layout(ctx->symtab, m);
1959     }
1960   }
1961 
1962   return true;
1963  }
1964 
upb_symtab_addtotabs(upb_symtab * s,symtab_addctx * ctx)1965 static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx) {
1966   const upb_filedef *file = ctx->file;
1967   upb_alloc *alloc = upb_arena_alloc(s->arena);
1968   upb_strtable_iter iter;
1969 
1970   CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name),
1971                                upb_value_constptr(file), alloc));
1972 
1973   upb_strtable_begin(&iter, ctx->addtab);
1974   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
1975     upb_strview key = upb_strtable_iter_key(&iter);
1976     upb_value value = upb_strtable_iter_value(&iter);
1977     CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc));
1978   }
1979 
1980   return true;
1981 }
1982 
1983 /* upb_filedef ****************************************************************/
1984 
upb_filedef_name(const upb_filedef * f)1985 const char *upb_filedef_name(const upb_filedef *f) {
1986   return f->name;
1987 }
1988 
upb_filedef_package(const upb_filedef * f)1989 const char *upb_filedef_package(const upb_filedef *f) {
1990   return f->package;
1991 }
1992 
upb_filedef_phpprefix(const upb_filedef * f)1993 const char *upb_filedef_phpprefix(const upb_filedef *f) {
1994   return f->phpprefix;
1995 }
1996 
upb_filedef_phpnamespace(const upb_filedef * f)1997 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
1998   return f->phpnamespace;
1999 }
2000 
upb_filedef_syntax(const upb_filedef * f)2001 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
2002   return f->syntax;
2003 }
2004 
upb_filedef_msgcount(const upb_filedef * f)2005 int upb_filedef_msgcount(const upb_filedef *f) {
2006   return f->msg_count;
2007 }
2008 
upb_filedef_depcount(const upb_filedef * f)2009 int upb_filedef_depcount(const upb_filedef *f) {
2010   return f->dep_count;
2011 }
2012 
upb_filedef_enumcount(const upb_filedef * f)2013 int upb_filedef_enumcount(const upb_filedef *f) {
2014   return f->enum_count;
2015 }
2016 
upb_filedef_dep(const upb_filedef * f,int i)2017 const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
2018   return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
2019 }
2020 
upb_filedef_msg(const upb_filedef * f,int i)2021 const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
2022   return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
2023 }
2024 
upb_filedef_enum(const upb_filedef * f,int i)2025 const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
2026   return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
2027 }
2028 
upb_symtab_free(upb_symtab * s)2029 void upb_symtab_free(upb_symtab *s) {
2030   upb_arena_free(s->arena);
2031   upb_gfree(s);
2032 }
2033 
upb_symtab_new(void)2034 upb_symtab *upb_symtab_new(void) {
2035   upb_symtab *s = upb_gmalloc(sizeof(*s));
2036   upb_alloc *alloc;
2037 
2038   if (!s) {
2039     return NULL;
2040   }
2041 
2042   s->arena = upb_arena_new();
2043   alloc = upb_arena_alloc(s->arena);
2044 
2045   if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) ||
2046       !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) {
2047     upb_arena_free(s->arena);
2048     upb_gfree(s);
2049     s = NULL;
2050   }
2051   return s;
2052 }
2053 
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)2054 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
2055   upb_value v;
2056   return upb_strtable_lookup(&s->syms, sym, &v) ?
2057       unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
2058 }
2059 
upb_symtab_lookupmsg2(const upb_symtab * s,const char * sym,size_t len)2060 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
2061                                         size_t len) {
2062   upb_value v;
2063   return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
2064       unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
2065 }
2066 
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)2067 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
2068   upb_value v;
2069   return upb_strtable_lookup(&s->syms, sym, &v) ?
2070       unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
2071 }
2072 
upb_symtab_lookupfile(const upb_symtab * s,const char * name)2073 const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
2074   upb_value v;
2075   return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
2076                                                   : NULL;
2077 }
2078 
upb_symtab_lookupfile2(const upb_symtab * s,const char * name,size_t len)2079 const upb_filedef *upb_symtab_lookupfile2(
2080     const upb_symtab *s, const char *name, size_t len) {
2081   upb_value v;
2082   return upb_strtable_lookup2(&s->files, name, len, &v) ?
2083       upb_value_getconstptr(v) : NULL;
2084 }
2085 
upb_symtab_filecount(const upb_symtab * s)2086 int upb_symtab_filecount(const upb_symtab *s) {
2087   return (int)upb_strtable_count(&s->files);
2088 }
2089 
_upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,const upb_msglayout ** layouts,upb_status * status)2090 static const upb_filedef *_upb_symtab_addfile(
2091     upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2092     const upb_msglayout **layouts, upb_status *status) {
2093   upb_arena *tmparena = upb_arena_new();
2094   upb_strtable addtab;
2095   upb_alloc *alloc = upb_arena_alloc(s->arena);
2096   upb_filedef *file = upb_malloc(alloc, sizeof(*file));
2097   bool ok;
2098   symtab_addctx ctx;
2099 
2100   ctx.file = file;
2101   ctx.symtab = s;
2102   ctx.alloc = alloc;
2103   ctx.tmp = upb_arena_alloc(tmparena);
2104   ctx.addtab = &addtab;
2105   ctx.layouts = layouts;
2106   ctx.status = status;
2107 
2108   ok = file && upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) &&
2109        build_filedef(&ctx, file, file_proto) && upb_symtab_addtotabs(s, &ctx);
2110 
2111   upb_arena_free(tmparena);
2112   return ok ? file : NULL;
2113 }
2114 
upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,upb_status * status)2115 const upb_filedef *upb_symtab_addfile(
2116     upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2117     upb_status *status) {
2118   return _upb_symtab_addfile(s, file_proto, NULL, status);
2119 }
2120 
2121 /* Include here since we want most of this file to be stdio-free. */
2122 #include <stdio.h>
2123 
_upb_symtab_loaddefinit(upb_symtab * s,const upb_def_init * init)2124 bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
2125   /* Since this function should never fail (it would indicate a bug in upb) we
2126    * print errors to stderr instead of returning error status to the user. */
2127   upb_def_init **deps = init->deps;
2128   google_protobuf_FileDescriptorProto *file;
2129   upb_arena *arena;
2130   upb_status status;
2131 
2132   upb_status_clear(&status);
2133 
2134   if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
2135     return true;
2136   }
2137 
2138   arena = upb_arena_new();
2139 
2140   for (; *deps; deps++) {
2141     if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
2142   }
2143 
2144   file = google_protobuf_FileDescriptorProto_parse(
2145       init->descriptor.data, init->descriptor.size, arena);
2146 
2147   if (!file) {
2148     upb_status_seterrf(
2149         &status,
2150         "Failed to parse compiled-in descriptor for file '%s'. This should "
2151         "never happen.",
2152         init->filename);
2153     goto err;
2154   }
2155 
2156   if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
2157 
2158   upb_arena_free(arena);
2159   return true;
2160 
2161 err:
2162   fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
2163           upb_status_errmsg(&status));
2164   upb_arena_free(arena);
2165   return false;
2166 }
2167 
2168 #undef CHK
2169 #undef CHK_OOM
2170