1
2 #include "upb/def.h"
3
4 #include <ctype.h>
5 #include <errno.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include "google/protobuf/descriptor.upb.h"
9
10 #include "upb/port_def.inc"
11
12 typedef struct {
13 size_t len;
14 char str[1]; /* Null-terminated string data follows. */
15 } str_t;
16
newstr(upb_alloc * alloc,const char * data,size_t len)17 static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) {
18 str_t *ret = upb_malloc(alloc, sizeof(*ret) + len);
19 if (!ret) return NULL;
20 ret->len = len;
21 if (len) memcpy(ret->str, data, len);
22 ret->str[len] = '\0';
23 return ret;
24 }
25
26 struct upb_fielddef {
27 const upb_filedef *file;
28 const upb_msgdef *msgdef;
29 const char *full_name;
30 const char *json_name;
31 union {
32 int64_t sint;
33 uint64_t uint;
34 double dbl;
35 float flt;
36 bool boolean;
37 str_t *str;
38 } defaultval;
39 const upb_oneofdef *oneof;
40 union {
41 const upb_msgdef *msgdef;
42 const upb_enumdef *enumdef;
43 const google_protobuf_FieldDescriptorProto *unresolved;
44 } sub;
45 uint32_t number_;
46 uint16_t index_;
47 uint16_t layout_index;
48 uint32_t selector_base; /* Used to index into a upb::Handlers table. */
49 bool is_extension_;
50 bool lazy_;
51 bool packed_;
52 bool proto3_optional_;
53 upb_descriptortype_t type_;
54 upb_label_t label_;
55 };
56
57 struct upb_msgdef {
58 const upb_msglayout *layout;
59 const upb_filedef *file;
60 const char *full_name;
61 uint32_t selector_count;
62 uint32_t submsg_field_count;
63
64 /* Tables for looking up fields by number and name. */
65 upb_inttable itof;
66 upb_strtable ntof;
67
68 const upb_fielddef *fields;
69 const upb_oneofdef *oneofs;
70 int field_count;
71 int oneof_count;
72 int real_oneof_count;
73
74 /* Is this a map-entry message? */
75 bool map_entry;
76 upb_wellknowntype_t well_known_type;
77
78 /* TODO(haberman): proper extension ranges (there can be multiple). */
79 };
80
81 struct upb_enumdef {
82 const upb_filedef *file;
83 const char *full_name;
84 upb_strtable ntoi;
85 upb_inttable iton;
86 int32_t defaultval;
87 };
88
89 struct upb_oneofdef {
90 const upb_msgdef *parent;
91 const char *full_name;
92 uint32_t index;
93 upb_strtable ntof;
94 upb_inttable itof;
95 };
96
97 struct upb_filedef {
98 const char *name;
99 const char *package;
100 const char *phpprefix;
101 const char *phpnamespace;
102 upb_syntax_t syntax;
103
104 const upb_filedef **deps;
105 const upb_msgdef *msgs;
106 const upb_enumdef *enums;
107 const upb_fielddef *exts;
108
109 int dep_count;
110 int msg_count;
111 int enum_count;
112 int ext_count;
113 };
114
115 struct upb_symtab {
116 upb_arena *arena;
117 upb_strtable syms; /* full_name -> packed def ptr */
118 upb_strtable files; /* file_name -> upb_filedef* */
119 };
120
121 /* Inside a symtab we store tagged pointers to specific def types. */
122 typedef enum {
123 UPB_DEFTYPE_FIELD = 0,
124
125 /* Only inside symtab table. */
126 UPB_DEFTYPE_MSG = 1,
127 UPB_DEFTYPE_ENUM = 2,
128
129 /* Only inside message table. */
130 UPB_DEFTYPE_ONEOF = 1,
131 UPB_DEFTYPE_FIELD_JSONNAME = 2
132 } upb_deftype_t;
133
unpack_def(upb_value v,upb_deftype_t type)134 static const void *unpack_def(upb_value v, upb_deftype_t type) {
135 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
136 return (num & 3) == type ? (const void*)(num & ~3) : NULL;
137 }
138
pack_def(const void * ptr,upb_deftype_t type)139 static upb_value pack_def(const void *ptr, upb_deftype_t type) {
140 uintptr_t num = (uintptr_t)ptr | type;
141 return upb_value_constptr((const void*)num);
142 }
143
144 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)145 static bool upb_isbetween(char c, char low, char high) {
146 return c >= low && c <= high;
147 }
148
upb_isletter(char c)149 static bool upb_isletter(char c) {
150 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
151 }
152
upb_isalphanum(char c)153 static bool upb_isalphanum(char c) {
154 return upb_isletter(c) || upb_isbetween(c, '0', '9');
155 }
156
upb_isident(upb_strview name,bool full,upb_status * s)157 static bool upb_isident(upb_strview name, bool full, upb_status *s) {
158 const char *str = name.data;
159 size_t len = name.size;
160 bool start = true;
161 size_t i;
162 for (i = 0; i < len; i++) {
163 char c = str[i];
164 if (c == '.') {
165 if (start || !full) {
166 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
167 return false;
168 }
169 start = true;
170 } else if (start) {
171 if (!upb_isletter(c)) {
172 upb_status_seterrf(
173 s, "invalid name: path components must start with a letter (%s)",
174 str);
175 return false;
176 }
177 start = false;
178 } else {
179 if (!upb_isalphanum(c)) {
180 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
181 str);
182 return false;
183 }
184 }
185 }
186 return !start;
187 }
188
shortdefname(const char * fullname)189 static const char *shortdefname(const char *fullname) {
190 const char *p;
191
192 if (fullname == NULL) {
193 return NULL;
194 } else if ((p = strrchr(fullname, '.')) == NULL) {
195 /* No '.' in the name, return the full string. */
196 return fullname;
197 } else {
198 /* Return one past the last '.'. */
199 return p + 1;
200 }
201 }
202
203 /* All submessage fields are lower than all other fields.
204 * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)205 uint32_t field_rank(const upb_fielddef *f) {
206 uint32_t ret = upb_fielddef_number(f);
207 const uint32_t high_bit = 1 << 30;
208 UPB_ASSERT(ret < high_bit);
209 if (!upb_fielddef_issubmsg(f))
210 ret |= high_bit;
211 return ret;
212 }
213
cmp_fields(const void * p1,const void * p2)214 int cmp_fields(const void *p1, const void *p2) {
215 const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
216 const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
217 return field_rank(f1) - field_rank(f2);
218 }
219
220 /* A few implementation details of handlers. We put these here to avoid
221 * a def -> handlers dependency. */
222
223 #define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */
224
upb_handlers_selectorbaseoffset(const upb_fielddef * f)225 static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
226 return upb_fielddef_isseq(f) ? 2 : 0;
227 }
228
upb_handlers_selectorcount(const upb_fielddef * f)229 static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
230 uint32_t ret = 1;
231 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */
232 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
233 if (upb_fielddef_issubmsg(f)) {
234 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
235 ret += 0;
236 if (upb_fielddef_lazy(f)) {
237 /* STARTSTR/ENDSTR/STRING (for lazy) */
238 ret += 3;
239 }
240 }
241 return ret;
242 }
243
upb_status_setoom(upb_status * status)244 static void upb_status_setoom(upb_status *status) {
245 upb_status_seterrmsg(status, "out of memory");
246 }
247
assign_msg_indices(upb_msgdef * m,upb_status * s)248 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
249 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the
250 * lowest indexes, but we do not publicly guarantee this. */
251 upb_msg_field_iter j;
252 int i;
253 uint32_t selector;
254 int n = upb_msgdef_numfields(m);
255 upb_fielddef **fields;
256
257 if (n == 0) {
258 m->selector_count = UPB_STATIC_SELECTOR_COUNT;
259 m->submsg_field_count = 0;
260 return true;
261 }
262
263 fields = upb_gmalloc(n * sizeof(*fields));
264 if (!fields) {
265 upb_status_setoom(s);
266 return false;
267 }
268
269 m->submsg_field_count = 0;
270 for(i = 0, upb_msg_field_begin(&j, m);
271 !upb_msg_field_done(&j);
272 upb_msg_field_next(&j), i++) {
273 upb_fielddef *f = upb_msg_iter_field(&j);
274 UPB_ASSERT(f->msgdef == m);
275 if (upb_fielddef_issubmsg(f)) {
276 m->submsg_field_count++;
277 }
278 fields[i] = f;
279 }
280
281 qsort(fields, n, sizeof(*fields), cmp_fields);
282
283 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
284 for (i = 0; i < n; i++) {
285 upb_fielddef *f = fields[i];
286 f->index_ = i;
287 f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
288 selector += upb_handlers_selectorcount(f);
289 }
290 m->selector_count = selector;
291
292 upb_gfree(fields);
293 return true;
294 }
295
check_oneofs(upb_msgdef * m,upb_status * s)296 static bool check_oneofs(upb_msgdef *m, upb_status *s) {
297 int i;
298 int first_synthetic = -1;
299 upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
300
301 for (i = 0; i < m->oneof_count; i++) {
302 mutable_oneofs[i].index = i;
303
304 if (upb_oneofdef_issynthetic(&mutable_oneofs[i])) {
305 if (first_synthetic == -1) {
306 first_synthetic = i;
307 }
308 } else {
309 if (first_synthetic != -1) {
310 upb_status_seterrf(
311 s, "Synthetic oneofs must be after all other oneofs: %s",
312 upb_oneofdef_name(&mutable_oneofs[i]));
313 return false;
314 }
315 }
316 }
317
318 if (first_synthetic == -1) {
319 m->real_oneof_count = m->oneof_count;
320 } else {
321 m->real_oneof_count = first_synthetic;
322 }
323
324 return true;
325 }
326
assign_msg_wellknowntype(upb_msgdef * m)327 static void assign_msg_wellknowntype(upb_msgdef *m) {
328 const char *name = upb_msgdef_fullname(m);
329 if (name == NULL) {
330 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
331 return;
332 }
333 if (!strcmp(name, "google.protobuf.Any")) {
334 m->well_known_type = UPB_WELLKNOWN_ANY;
335 } else if (!strcmp(name, "google.protobuf.FieldMask")) {
336 m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
337 } else if (!strcmp(name, "google.protobuf.Duration")) {
338 m->well_known_type = UPB_WELLKNOWN_DURATION;
339 } else if (!strcmp(name, "google.protobuf.Timestamp")) {
340 m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
341 } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
342 m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
343 } else if (!strcmp(name, "google.protobuf.FloatValue")) {
344 m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
345 } else if (!strcmp(name, "google.protobuf.Int64Value")) {
346 m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
347 } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
348 m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
349 } else if (!strcmp(name, "google.protobuf.Int32Value")) {
350 m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
351 } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
352 m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
353 } else if (!strcmp(name, "google.protobuf.BoolValue")) {
354 m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
355 } else if (!strcmp(name, "google.protobuf.StringValue")) {
356 m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
357 } else if (!strcmp(name, "google.protobuf.BytesValue")) {
358 m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
359 } else if (!strcmp(name, "google.protobuf.Value")) {
360 m->well_known_type = UPB_WELLKNOWN_VALUE;
361 } else if (!strcmp(name, "google.protobuf.ListValue")) {
362 m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
363 } else if (!strcmp(name, "google.protobuf.Struct")) {
364 m->well_known_type = UPB_WELLKNOWN_STRUCT;
365 } else {
366 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
367 }
368 }
369
370
371 /* upb_enumdef ****************************************************************/
372
upb_enumdef_fullname(const upb_enumdef * e)373 const char *upb_enumdef_fullname(const upb_enumdef *e) {
374 return e->full_name;
375 }
376
upb_enumdef_name(const upb_enumdef * e)377 const char *upb_enumdef_name(const upb_enumdef *e) {
378 return shortdefname(e->full_name);
379 }
380
upb_enumdef_file(const upb_enumdef * e)381 const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
382 return e->file;
383 }
384
upb_enumdef_default(const upb_enumdef * e)385 int32_t upb_enumdef_default(const upb_enumdef *e) {
386 UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
387 return e->defaultval;
388 }
389
upb_enumdef_numvals(const upb_enumdef * e)390 int upb_enumdef_numvals(const upb_enumdef *e) {
391 return (int)upb_strtable_count(&e->ntoi);
392 }
393
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)394 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
395 /* We iterate over the ntoi table, to account for duplicate numbers. */
396 upb_strtable_begin(i, &e->ntoi);
397 }
398
upb_enum_next(upb_enum_iter * iter)399 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)400 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
401
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)402 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
403 size_t len, int32_t *num) {
404 upb_value v;
405 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
406 return false;
407 }
408 if (num) *num = upb_value_getint32(v);
409 return true;
410 }
411
upb_enumdef_iton(const upb_enumdef * def,int32_t num)412 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
413 upb_value v;
414 return upb_inttable_lookup32(&def->iton, num, &v) ?
415 upb_value_getcstr(v) : NULL;
416 }
417
upb_enum_iter_name(upb_enum_iter * iter)418 const char *upb_enum_iter_name(upb_enum_iter *iter) {
419 return upb_strtable_iter_key(iter).data;
420 }
421
upb_enum_iter_number(upb_enum_iter * iter)422 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
423 return upb_value_getint32(upb_strtable_iter_value(iter));
424 }
425
426
427 /* upb_fielddef ***************************************************************/
428
upb_fielddef_fullname(const upb_fielddef * f)429 const char *upb_fielddef_fullname(const upb_fielddef *f) {
430 return f->full_name;
431 }
432
upb_fielddef_type(const upb_fielddef * f)433 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
434 switch (f->type_) {
435 case UPB_DESCRIPTOR_TYPE_DOUBLE:
436 return UPB_TYPE_DOUBLE;
437 case UPB_DESCRIPTOR_TYPE_FLOAT:
438 return UPB_TYPE_FLOAT;
439 case UPB_DESCRIPTOR_TYPE_INT64:
440 case UPB_DESCRIPTOR_TYPE_SINT64:
441 case UPB_DESCRIPTOR_TYPE_SFIXED64:
442 return UPB_TYPE_INT64;
443 case UPB_DESCRIPTOR_TYPE_INT32:
444 case UPB_DESCRIPTOR_TYPE_SFIXED32:
445 case UPB_DESCRIPTOR_TYPE_SINT32:
446 return UPB_TYPE_INT32;
447 case UPB_DESCRIPTOR_TYPE_UINT64:
448 case UPB_DESCRIPTOR_TYPE_FIXED64:
449 return UPB_TYPE_UINT64;
450 case UPB_DESCRIPTOR_TYPE_UINT32:
451 case UPB_DESCRIPTOR_TYPE_FIXED32:
452 return UPB_TYPE_UINT32;
453 case UPB_DESCRIPTOR_TYPE_ENUM:
454 return UPB_TYPE_ENUM;
455 case UPB_DESCRIPTOR_TYPE_BOOL:
456 return UPB_TYPE_BOOL;
457 case UPB_DESCRIPTOR_TYPE_STRING:
458 return UPB_TYPE_STRING;
459 case UPB_DESCRIPTOR_TYPE_BYTES:
460 return UPB_TYPE_BYTES;
461 case UPB_DESCRIPTOR_TYPE_GROUP:
462 case UPB_DESCRIPTOR_TYPE_MESSAGE:
463 return UPB_TYPE_MESSAGE;
464 }
465 UPB_UNREACHABLE();
466 }
467
upb_fielddef_descriptortype(const upb_fielddef * f)468 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
469 return f->type_;
470 }
471
upb_fielddef_index(const upb_fielddef * f)472 uint32_t upb_fielddef_index(const upb_fielddef *f) {
473 return f->index_;
474 }
475
upb_fielddef_label(const upb_fielddef * f)476 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
477 return f->label_;
478 }
479
upb_fielddef_number(const upb_fielddef * f)480 uint32_t upb_fielddef_number(const upb_fielddef *f) {
481 return f->number_;
482 }
483
upb_fielddef_isextension(const upb_fielddef * f)484 bool upb_fielddef_isextension(const upb_fielddef *f) {
485 return f->is_extension_;
486 }
487
upb_fielddef_lazy(const upb_fielddef * f)488 bool upb_fielddef_lazy(const upb_fielddef *f) {
489 return f->lazy_;
490 }
491
upb_fielddef_packed(const upb_fielddef * f)492 bool upb_fielddef_packed(const upb_fielddef *f) {
493 return f->packed_;
494 }
495
upb_fielddef_name(const upb_fielddef * f)496 const char *upb_fielddef_name(const upb_fielddef *f) {
497 return shortdefname(f->full_name);
498 }
499
upb_fielddef_jsonname(const upb_fielddef * f)500 const char *upb_fielddef_jsonname(const upb_fielddef *f) {
501 return f->json_name;
502 }
503
upb_fielddef_selectorbase(const upb_fielddef * f)504 uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
505 return f->selector_base;
506 }
507
upb_fielddef_file(const upb_fielddef * f)508 const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
509 return f->file;
510 }
511
upb_fielddef_containingtype(const upb_fielddef * f)512 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
513 return f->msgdef;
514 }
515
upb_fielddef_containingoneof(const upb_fielddef * f)516 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
517 return f->oneof;
518 }
519
upb_fielddef_realcontainingoneof(const upb_fielddef * f)520 const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
521 if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
522 return f->oneof;
523 }
524
chkdefaulttype(const upb_fielddef * f,int ctype)525 static void chkdefaulttype(const upb_fielddef *f, int ctype) {
526 UPB_UNUSED(f);
527 UPB_UNUSED(ctype);
528 }
529
upb_fielddef_defaultint64(const upb_fielddef * f)530 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
531 chkdefaulttype(f, UPB_TYPE_INT64);
532 return f->defaultval.sint;
533 }
534
upb_fielddef_defaultint32(const upb_fielddef * f)535 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
536 chkdefaulttype(f, UPB_TYPE_INT32);
537 return (int32_t)f->defaultval.sint;
538 }
539
upb_fielddef_defaultuint64(const upb_fielddef * f)540 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
541 chkdefaulttype(f, UPB_TYPE_UINT64);
542 return f->defaultval.uint;
543 }
544
upb_fielddef_defaultuint32(const upb_fielddef * f)545 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
546 chkdefaulttype(f, UPB_TYPE_UINT32);
547 return (uint32_t)f->defaultval.uint;
548 }
549
upb_fielddef_defaultbool(const upb_fielddef * f)550 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
551 chkdefaulttype(f, UPB_TYPE_BOOL);
552 return f->defaultval.boolean;
553 }
554
upb_fielddef_defaultfloat(const upb_fielddef * f)555 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
556 chkdefaulttype(f, UPB_TYPE_FLOAT);
557 return f->defaultval.flt;
558 }
559
upb_fielddef_defaultdouble(const upb_fielddef * f)560 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
561 chkdefaulttype(f, UPB_TYPE_DOUBLE);
562 return f->defaultval.dbl;
563 }
564
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)565 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
566 str_t *str = f->defaultval.str;
567 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
568 upb_fielddef_type(f) == UPB_TYPE_BYTES ||
569 upb_fielddef_type(f) == UPB_TYPE_ENUM);
570 if (str) {
571 if (len) *len = str->len;
572 return str->str;
573 } else {
574 if (len) *len = 0;
575 return NULL;
576 }
577 }
578
upb_fielddef_msgsubdef(const upb_fielddef * f)579 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
580 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE ? f->sub.msgdef : NULL;
581 }
582
upb_fielddef_enumsubdef(const upb_fielddef * f)583 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
584 return upb_fielddef_type(f) == UPB_TYPE_ENUM ? f->sub.enumdef : NULL;
585 }
586
upb_fielddef_layout(const upb_fielddef * f)587 const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
588 return &f->msgdef->layout->fields[f->layout_index];
589 }
590
upb_fielddef_issubmsg(const upb_fielddef * f)591 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
592 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
593 }
594
upb_fielddef_isstring(const upb_fielddef * f)595 bool upb_fielddef_isstring(const upb_fielddef *f) {
596 return upb_fielddef_type(f) == UPB_TYPE_STRING ||
597 upb_fielddef_type(f) == UPB_TYPE_BYTES;
598 }
599
upb_fielddef_isseq(const upb_fielddef * f)600 bool upb_fielddef_isseq(const upb_fielddef *f) {
601 return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
602 }
603
upb_fielddef_isprimitive(const upb_fielddef * f)604 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
605 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
606 }
607
upb_fielddef_ismap(const upb_fielddef * f)608 bool upb_fielddef_ismap(const upb_fielddef *f) {
609 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
610 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
611 }
612
upb_fielddef_hassubdef(const upb_fielddef * f)613 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
614 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
615 }
616
upb_fielddef_haspresence(const upb_fielddef * f)617 bool upb_fielddef_haspresence(const upb_fielddef *f) {
618 if (upb_fielddef_isseq(f)) return false;
619 return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
620 f->file->syntax == UPB_SYNTAX_PROTO2;
621 }
622
between(int32_t x,int32_t low,int32_t high)623 static bool between(int32_t x, int32_t low, int32_t high) {
624 return x >= low && x <= high;
625 }
626
upb_fielddef_checklabel(int32_t label)627 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)628 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)629 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
630
upb_fielddef_checkdescriptortype(int32_t type)631 bool upb_fielddef_checkdescriptortype(int32_t type) {
632 return between(type, 1, 18);
633 }
634
635 /* upb_msgdef *****************************************************************/
636
upb_msgdef_fullname(const upb_msgdef * m)637 const char *upb_msgdef_fullname(const upb_msgdef *m) {
638 return m->full_name;
639 }
640
upb_msgdef_file(const upb_msgdef * m)641 const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
642 return m->file;
643 }
644
upb_msgdef_name(const upb_msgdef * m)645 const char *upb_msgdef_name(const upb_msgdef *m) {
646 return shortdefname(m->full_name);
647 }
648
upb_msgdef_syntax(const upb_msgdef * m)649 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
650 return m->file->syntax;
651 }
652
upb_msgdef_selectorcount(const upb_msgdef * m)653 size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
654 return m->selector_count;
655 }
656
upb_msgdef_submsgfieldcount(const upb_msgdef * m)657 uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
658 return m->submsg_field_count;
659 }
660
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)661 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
662 upb_value val;
663 return upb_inttable_lookup32(&m->itof, i, &val) ?
664 upb_value_getconstptr(val) : NULL;
665 }
666
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)667 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
668 size_t len) {
669 upb_value val;
670
671 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
672 return NULL;
673 }
674
675 return unpack_def(val, UPB_DEFTYPE_FIELD);
676 }
677
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)678 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
679 size_t len) {
680 upb_value val;
681
682 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
683 return NULL;
684 }
685
686 return unpack_def(val, UPB_DEFTYPE_ONEOF);
687 }
688
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)689 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
690 const upb_fielddef **f, const upb_oneofdef **o) {
691 upb_value val;
692
693 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
694 return false;
695 }
696
697 *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
698 *f = unpack_def(val, UPB_DEFTYPE_FIELD);
699 return *o || *f; /* False if this was a JSON name. */
700 }
701
upb_msgdef_lookupjsonname(const upb_msgdef * m,const char * name,size_t len)702 const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
703 const char *name, size_t len) {
704 upb_value val;
705 const upb_fielddef* f;
706
707 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
708 return NULL;
709 }
710
711 f = unpack_def(val, UPB_DEFTYPE_FIELD);
712 if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
713
714 return f;
715 }
716
upb_msgdef_numfields(const upb_msgdef * m)717 int upb_msgdef_numfields(const upb_msgdef *m) {
718 return m->field_count;
719 }
720
upb_msgdef_numoneofs(const upb_msgdef * m)721 int upb_msgdef_numoneofs(const upb_msgdef *m) {
722 return m->oneof_count;
723 }
724
upb_msgdef_numrealoneofs(const upb_msgdef * m)725 int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
726 return m->real_oneof_count;
727 }
728
upb_msgdef_layout(const upb_msgdef * m)729 const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
730 return m->layout;
731 }
732
_upb_msgdef_field(const upb_msgdef * m,int i)733 const upb_fielddef *_upb_msgdef_field(const upb_msgdef *m, int i) {
734 if (i >= m->field_count) return NULL;
735 return &m->fields[i];
736 }
737
upb_msgdef_mapentry(const upb_msgdef * m)738 bool upb_msgdef_mapentry(const upb_msgdef *m) {
739 return m->map_entry;
740 }
741
upb_msgdef_wellknowntype(const upb_msgdef * m)742 upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
743 return m->well_known_type;
744 }
745
upb_msgdef_isnumberwrapper(const upb_msgdef * m)746 bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
747 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
748 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
749 type <= UPB_WELLKNOWN_UINT32VALUE;
750 }
751
upb_msgdef_iswrapper(const upb_msgdef * m)752 bool upb_msgdef_iswrapper(const upb_msgdef *m) {
753 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
754 return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
755 type <= UPB_WELLKNOWN_BOOLVALUE;
756 }
757
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)758 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
759 upb_inttable_begin(iter, &m->itof);
760 }
761
upb_msg_field_next(upb_msg_field_iter * iter)762 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
763
upb_msg_field_done(const upb_msg_field_iter * iter)764 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
765 return upb_inttable_done(iter);
766 }
767
upb_msg_iter_field(const upb_msg_field_iter * iter)768 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
769 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
770 }
771
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)772 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
773 upb_inttable_iter_setdone(iter);
774 }
775
upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,const upb_msg_field_iter * iter2)776 bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
777 const upb_msg_field_iter * iter2) {
778 return upb_inttable_iter_isequal(iter1, iter2);
779 }
780
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)781 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
782 upb_strtable_begin(iter, &m->ntof);
783 /* We need to skip past any initial fields. */
784 while (!upb_strtable_done(iter) &&
785 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
786 upb_strtable_next(iter);
787 }
788 }
789
upb_msg_oneof_next(upb_msg_oneof_iter * iter)790 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
791 /* We need to skip past fields to return only oneofs. */
792 do {
793 upb_strtable_next(iter);
794 } while (!upb_strtable_done(iter) &&
795 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
796 }
797
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)798 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
799 return upb_strtable_done(iter);
800 }
801
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)802 const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
803 return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
804 }
805
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)806 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
807 upb_strtable_iter_setdone(iter);
808 }
809
upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter * iter1,const upb_msg_oneof_iter * iter2)810 bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
811 const upb_msg_oneof_iter *iter2) {
812 return upb_strtable_iter_isequal(iter1, iter2);
813 }
814
815 /* upb_oneofdef ***************************************************************/
816
upb_oneofdef_name(const upb_oneofdef * o)817 const char *upb_oneofdef_name(const upb_oneofdef *o) {
818 return shortdefname(o->full_name);
819 }
820
upb_oneofdef_containingtype(const upb_oneofdef * o)821 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
822 return o->parent;
823 }
824
upb_oneofdef_numfields(const upb_oneofdef * o)825 int upb_oneofdef_numfields(const upb_oneofdef *o) {
826 return (int)upb_strtable_count(&o->ntof);
827 }
828
upb_oneofdef_index(const upb_oneofdef * o)829 uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
830 return o->index;
831 }
832
upb_oneofdef_issynthetic(const upb_oneofdef * o)833 bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
834 upb_inttable_iter iter;
835 const upb_fielddef *f;
836 upb_inttable_begin(&iter, &o->itof);
837 if (upb_oneofdef_numfields(o) != 1) return false;
838 f = upb_value_getptr(upb_inttable_iter_value(&iter));
839 UPB_ASSERT(f);
840 return f->proto3_optional_;
841 }
842
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)843 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
844 const char *name, size_t length) {
845 upb_value val;
846 return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
847 upb_value_getptr(val) : NULL;
848 }
849
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)850 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
851 upb_value val;
852 return upb_inttable_lookup32(&o->itof, num, &val) ?
853 upb_value_getptr(val) : NULL;
854 }
855
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)856 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
857 upb_inttable_begin(iter, &o->itof);
858 }
859
upb_oneof_next(upb_oneof_iter * iter)860 void upb_oneof_next(upb_oneof_iter *iter) {
861 upb_inttable_next(iter);
862 }
863
upb_oneof_done(upb_oneof_iter * iter)864 bool upb_oneof_done(upb_oneof_iter *iter) {
865 return upb_inttable_done(iter);
866 }
867
upb_oneof_iter_field(const upb_oneof_iter * iter)868 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
869 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
870 }
871
upb_oneof_iter_setdone(upb_oneof_iter * iter)872 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
873 upb_inttable_iter_setdone(iter);
874 }
875
876 /* Dynamic Layout Generation. *************************************************/
877
div_round_up(size_t n,size_t d)878 static size_t div_round_up(size_t n, size_t d) {
879 return (n + d - 1) / d;
880 }
881
upb_msgval_sizeof(upb_fieldtype_t type)882 static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
883 switch (type) {
884 case UPB_TYPE_DOUBLE:
885 case UPB_TYPE_INT64:
886 case UPB_TYPE_UINT64:
887 return 8;
888 case UPB_TYPE_ENUM:
889 case UPB_TYPE_INT32:
890 case UPB_TYPE_UINT32:
891 case UPB_TYPE_FLOAT:
892 return 4;
893 case UPB_TYPE_BOOL:
894 return 1;
895 case UPB_TYPE_MESSAGE:
896 return sizeof(void*);
897 case UPB_TYPE_BYTES:
898 case UPB_TYPE_STRING:
899 return sizeof(upb_strview);
900 }
901 UPB_UNREACHABLE();
902 }
903
upb_msg_fielddefsize(const upb_fielddef * f)904 static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
905 if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
906 upb_map_entry ent;
907 UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
908 return sizeof(ent.k);
909 } else if (upb_fielddef_isseq(f)) {
910 return sizeof(void*);
911 } else {
912 return upb_msgval_sizeof(upb_fielddef_type(f));
913 }
914 }
915
upb_msglayout_place(upb_msglayout * l,size_t size)916 static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
917 uint32_t ret;
918
919 l->size = UPB_ALIGN_UP(l->size, size);
920 ret = l->size;
921 l->size += size;
922 return ret;
923 }
924
field_number_cmp(const void * p1,const void * p2)925 static int field_number_cmp(const void *p1, const void *p2) {
926 const upb_msglayout_field *f1 = p1;
927 const upb_msglayout_field *f2 = p2;
928 return f1->number - f2->number;
929 }
930
assign_layout_indices(const upb_msgdef * m,upb_msglayout_field * fields)931 static void assign_layout_indices(const upb_msgdef *m, upb_msglayout_field *fields) {
932 int i;
933 int n = upb_msgdef_numfields(m);
934 for (i = 0; i < n; i++) {
935 upb_fielddef *f = (upb_fielddef*)upb_msgdef_itof(m, fields[i].number);
936 UPB_ASSERT(f);
937 f->layout_index = i;
938 }
939 }
940
941 /* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
942 * It computes a dynamic layout for all of the fields in |m|. */
make_layout(const upb_symtab * symtab,const upb_msgdef * m)943 static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
944 upb_msglayout *l = (upb_msglayout*)m->layout;
945 upb_msg_field_iter it;
946 upb_msg_oneof_iter oit;
947 size_t hasbit;
948 size_t submsg_count = m->submsg_field_count;
949 const upb_msglayout **submsgs;
950 upb_msglayout_field *fields;
951 upb_alloc *alloc = upb_arena_alloc(symtab->arena);
952
953 memset(l, 0, sizeof(*l));
954
955 fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields));
956 submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs));
957
958 if ((!fields && upb_msgdef_numfields(m)) ||
959 (!submsgs && submsg_count)) {
960 /* OOM. */
961 return false;
962 }
963
964 l->field_count = upb_msgdef_numfields(m);
965 l->fields = fields;
966 l->submsgs = submsgs;
967
968 if (upb_msgdef_mapentry(m)) {
969 /* TODO(haberman): refactor this method so this special case is more
970 * elegant. */
971 const upb_fielddef *key = upb_msgdef_itof(m, 1);
972 const upb_fielddef *val = upb_msgdef_itof(m, 2);
973 fields[0].number = 1;
974 fields[1].number = 2;
975 fields[0].label = UPB_LABEL_OPTIONAL;
976 fields[1].label = UPB_LABEL_OPTIONAL;
977 fields[0].presence = 0;
978 fields[1].presence = 0;
979 fields[0].descriptortype = upb_fielddef_descriptortype(key);
980 fields[1].descriptortype = upb_fielddef_descriptortype(val);
981 fields[0].offset = 0;
982 fields[1].offset = sizeof(upb_strview);
983 fields[1].submsg_index = 0;
984
985 if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
986 submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
987 }
988
989 l->field_count = 2;
990 l->size = 2 * sizeof(upb_strview);
991 l->size = UPB_ALIGN_UP(l->size, 8);
992 return true;
993 }
994
995 /* Allocate data offsets in three stages:
996 *
997 * 1. hasbits.
998 * 2. regular fields.
999 * 3. oneof fields.
1000 *
1001 * OPT: There is a lot of room for optimization here to minimize the size.
1002 */
1003
1004 /* Allocate hasbits and set basic field attributes. */
1005 submsg_count = 0;
1006 for (upb_msg_field_begin(&it, m), hasbit = 0;
1007 !upb_msg_field_done(&it);
1008 upb_msg_field_next(&it)) {
1009 upb_fielddef* f = upb_msg_iter_field(&it);
1010 upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
1011
1012 field->number = upb_fielddef_number(f);
1013 field->descriptortype = upb_fielddef_descriptortype(f);
1014 field->label = upb_fielddef_label(f);
1015
1016 if (field->descriptortype == UPB_DTYPE_STRING &&
1017 f->file->syntax == UPB_SYNTAX_PROTO2) {
1018 /* See TableDescriptorType() in upbc/generator.cc for details and
1019 * rationale. */
1020 field->descriptortype = UPB_DTYPE_BYTES;
1021 }
1022
1023 if (upb_fielddef_ismap(f)) {
1024 field->label = _UPB_LABEL_MAP;
1025 } else if (upb_fielddef_packed(f)) {
1026 field->label = _UPB_LABEL_PACKED;
1027 }
1028
1029 if (upb_fielddef_issubmsg(f)) {
1030 const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
1031 field->submsg_index = submsg_count++;
1032 submsgs[field->submsg_index] = subm->layout;
1033 }
1034
1035 if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
1036 /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
1037 * table. This wastes one hasbit, but we don't worry about it for now. */
1038 field->presence = ++hasbit;
1039 } else {
1040 field->presence = 0;
1041 }
1042 }
1043
1044 /* Account for space used by hasbits. */
1045 l->size = div_round_up(hasbit, 8);
1046
1047 /* Allocate non-oneof fields. */
1048 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
1049 upb_msg_field_next(&it)) {
1050 const upb_fielddef* f = upb_msg_iter_field(&it);
1051 size_t field_size = upb_msg_fielddefsize(f);
1052 size_t index = upb_fielddef_index(f);
1053
1054 if (upb_fielddef_realcontainingoneof(f)) {
1055 /* Oneofs are handled separately below. */
1056 continue;
1057 }
1058
1059 fields[index].offset = upb_msglayout_place(l, field_size);
1060 }
1061
1062 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
1063 * and space for the actual data. */
1064 for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
1065 upb_msg_oneof_next(&oit)) {
1066 const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
1067 upb_oneof_iter fit;
1068
1069 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
1070 size_t field_size = 0;
1071 uint32_t case_offset;
1072 uint32_t data_offset;
1073
1074 if (upb_oneofdef_issynthetic(o)) continue;
1075
1076 /* Calculate field size: the max of all field sizes. */
1077 for (upb_oneof_begin(&fit, o);
1078 !upb_oneof_done(&fit);
1079 upb_oneof_next(&fit)) {
1080 const upb_fielddef* f = upb_oneof_iter_field(&fit);
1081 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
1082 }
1083
1084 /* Align and allocate case offset. */
1085 case_offset = upb_msglayout_place(l, case_size);
1086 data_offset = upb_msglayout_place(l, field_size);
1087
1088 for (upb_oneof_begin(&fit, o);
1089 !upb_oneof_done(&fit);
1090 upb_oneof_next(&fit)) {
1091 const upb_fielddef* f = upb_oneof_iter_field(&fit);
1092 fields[upb_fielddef_index(f)].offset = data_offset;
1093 fields[upb_fielddef_index(f)].presence = ~case_offset;
1094 }
1095 }
1096
1097 /* Size of the entire structure should be a multiple of its greatest
1098 * alignment. TODO: track overall alignment for real? */
1099 l->size = UPB_ALIGN_UP(l->size, 8);
1100
1101 /* Sort fields by number. */
1102 qsort(fields, upb_msgdef_numfields(m), sizeof(*fields), field_number_cmp);
1103 assign_layout_indices(m, fields);
1104
1105 return true;
1106 }
1107
1108 /* Code to build defs from descriptor protos. *********************************/
1109
1110 /* There is a question of how much validation to do here. It will be difficult
1111 * to perfectly match the amount of validation performed by proto2. But since
1112 * this code is used to directly build defs from Ruby (for example) we do need
1113 * to validate important constraints like uniqueness of names and numbers. */
1114
1115 #define CHK(x) if (!(x)) { return false; }
1116 #define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; }
1117
1118 typedef struct {
1119 const upb_symtab *symtab;
1120 upb_filedef *file; /* File we are building. */
1121 upb_alloc *alloc; /* Allocate defs here. */
1122 upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */
1123 upb_strtable *addtab; /* full_name -> packed def ptr for new defs */
1124 const upb_msglayout **layouts; /* NULL if we should build layouts. */
1125 upb_status *status; /* Record errors here. */
1126 } symtab_addctx;
1127
strviewdup(const symtab_addctx * ctx,upb_strview view)1128 static char* strviewdup(const symtab_addctx *ctx, upb_strview view) {
1129 return upb_strdup2(view.data, view.size, ctx->alloc);
1130 }
1131
streql2(const char * a,size_t n,const char * b)1132 static bool streql2(const char *a, size_t n, const char *b) {
1133 return n == strlen(b) && memcmp(a, b, n) == 0;
1134 }
1135
streql_view(upb_strview view,const char * b)1136 static bool streql_view(upb_strview view, const char *b) {
1137 return streql2(view.data, view.size, b);
1138 }
1139
makefullname(const symtab_addctx * ctx,const char * prefix,upb_strview name)1140 static const char *makefullname(const symtab_addctx *ctx, const char *prefix,
1141 upb_strview name) {
1142 if (prefix) {
1143 /* ret = prefix + '.' + name; */
1144 size_t n = strlen(prefix);
1145 char *ret = upb_malloc(ctx->alloc, n + name.size + 2);
1146 CHK_OOM(ret);
1147 strcpy(ret, prefix);
1148 ret[n] = '.';
1149 memcpy(&ret[n + 1], name.data, name.size);
1150 ret[n + 1 + name.size] = '\0';
1151 return ret;
1152 } else {
1153 return strviewdup(ctx, name);
1154 }
1155 }
1156
getjsonname(const char * name,char * buf,size_t len)1157 size_t getjsonname(const char *name, char *buf, size_t len) {
1158 size_t src, dst = 0;
1159 bool ucase_next = false;
1160
1161 #define WRITE(byte) \
1162 ++dst; \
1163 if (dst < len) buf[dst - 1] = byte; \
1164 else if (dst == len) buf[dst - 1] = '\0'
1165
1166 if (!name) {
1167 WRITE('\0');
1168 return 0;
1169 }
1170
1171 /* Implement the transformation as described in the spec:
1172 * 1. upper case all letters after an underscore.
1173 * 2. remove all underscores.
1174 */
1175 for (src = 0; name[src]; src++) {
1176 if (name[src] == '_') {
1177 ucase_next = true;
1178 continue;
1179 }
1180
1181 if (ucase_next) {
1182 WRITE(toupper(name[src]));
1183 ucase_next = false;
1184 } else {
1185 WRITE(name[src]);
1186 }
1187 }
1188
1189 WRITE('\0');
1190 return dst;
1191
1192 #undef WRITE
1193 }
1194
makejsonname(const char * name,upb_alloc * alloc)1195 static char* makejsonname(const char* name, upb_alloc *alloc) {
1196 size_t size = getjsonname(name, NULL, 0);
1197 char* json_name = upb_malloc(alloc, size);
1198 getjsonname(name, json_name, size);
1199 return json_name;
1200 }
1201
symtab_add(const symtab_addctx * ctx,const char * name,upb_value v)1202 static bool symtab_add(const symtab_addctx *ctx, const char *name,
1203 upb_value v) {
1204 upb_value tmp;
1205 if (upb_strtable_lookup(ctx->addtab, name, &tmp) ||
1206 upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) {
1207 upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name);
1208 return false;
1209 }
1210
1211 CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp));
1212 return true;
1213 }
1214
1215 /* Given a symbol and the base symbol inside which it is defined, find the
1216 * symbol's definition in t. */
resolvename(const upb_strtable * t,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type,upb_status * status,const void ** def)1217 static bool resolvename(const upb_strtable *t, const upb_fielddef *f,
1218 const char *base, upb_strview sym,
1219 upb_deftype_t type, upb_status *status,
1220 const void **def) {
1221 if(sym.size == 0) return false;
1222 if(sym.data[0] == '.') {
1223 /* Symbols starting with '.' are absolute, so we do a single lookup.
1224 * Slice to omit the leading '.' */
1225 upb_value v;
1226 if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
1227 return false;
1228 }
1229
1230 *def = unpack_def(v, type);
1231
1232 if (!*def) {
1233 upb_status_seterrf(status,
1234 "type mismatch when resolving field %s, name %s",
1235 f->full_name, sym.data);
1236 return false;
1237 }
1238
1239 return true;
1240 } else {
1241 /* Remove components from base until we find an entry or run out.
1242 * TODO: This branch is totally broken, but currently not used. */
1243 (void)base;
1244 UPB_ASSERT(false);
1245 return false;
1246 }
1247 }
1248
symtab_resolve(const symtab_addctx * ctx,const upb_fielddef * f,const char * base,upb_strview sym,upb_deftype_t type)1249 const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f,
1250 const char *base, upb_strview sym,
1251 upb_deftype_t type) {
1252 const void *ret;
1253 if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) &&
1254 !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) {
1255 if (upb_ok(ctx->status)) {
1256 upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data);
1257 }
1258 return false;
1259 }
1260 return ret;
1261 }
1262
create_oneofdef(const symtab_addctx * ctx,upb_msgdef * m,const google_protobuf_OneofDescriptorProto * oneof_proto)1263 static bool create_oneofdef(
1264 const symtab_addctx *ctx, upb_msgdef *m,
1265 const google_protobuf_OneofDescriptorProto *oneof_proto) {
1266 upb_oneofdef *o;
1267 upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
1268 upb_value v;
1269
1270 o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
1271 o->parent = m;
1272 o->full_name = makefullname(ctx, m->full_name, name);
1273
1274 v = pack_def(o, UPB_DEFTYPE_ONEOF);
1275 CHK_OOM(symtab_add(ctx, o->full_name, v));
1276 CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
1277
1278 CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1279 CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1280
1281 return true;
1282 }
1283
parse_default(const symtab_addctx * ctx,const char * str,size_t len,upb_fielddef * f)1284 static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len,
1285 upb_fielddef *f) {
1286 char *end;
1287 char nullz[64];
1288 errno = 0;
1289
1290 switch (upb_fielddef_type(f)) {
1291 case UPB_TYPE_INT32:
1292 case UPB_TYPE_INT64:
1293 case UPB_TYPE_UINT32:
1294 case UPB_TYPE_UINT64:
1295 case UPB_TYPE_DOUBLE:
1296 case UPB_TYPE_FLOAT:
1297 /* Standard C number parsing functions expect null-terminated strings. */
1298 if (len >= sizeof(nullz) - 1) {
1299 return false;
1300 }
1301 memcpy(nullz, str, len);
1302 nullz[len] = '\0';
1303 str = nullz;
1304 break;
1305 default:
1306 break;
1307 }
1308
1309 switch (upb_fielddef_type(f)) {
1310 case UPB_TYPE_INT32: {
1311 long val = strtol(str, &end, 0);
1312 CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end);
1313 f->defaultval.sint = val;
1314 break;
1315 }
1316 case UPB_TYPE_ENUM: {
1317 const upb_enumdef *e = f->sub.enumdef;
1318 int32_t val;
1319 CHK(upb_enumdef_ntoi(e, str, len, &val));
1320 f->defaultval.sint = val;
1321 break;
1322 }
1323 case UPB_TYPE_INT64: {
1324 /* XXX: Need to write our own strtoll, since it's not available in c89. */
1325 int64_t val = strtol(str, &end, 0);
1326 CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end);
1327 f->defaultval.sint = val;
1328 break;
1329 }
1330 case UPB_TYPE_UINT32: {
1331 unsigned long val = strtoul(str, &end, 0);
1332 CHK(val <= UINT32_MAX && errno != ERANGE && !*end);
1333 f->defaultval.uint = val;
1334 break;
1335 }
1336 case UPB_TYPE_UINT64: {
1337 /* XXX: Need to write our own strtoull, since it's not available in c89. */
1338 uint64_t val = strtoul(str, &end, 0);
1339 CHK(val <= UINT64_MAX && errno != ERANGE && !*end);
1340 f->defaultval.uint = val;
1341 break;
1342 }
1343 case UPB_TYPE_DOUBLE: {
1344 double val = strtod(str, &end);
1345 CHK(errno != ERANGE && !*end);
1346 f->defaultval.dbl = val;
1347 break;
1348 }
1349 case UPB_TYPE_FLOAT: {
1350 /* XXX: Need to write our own strtof, since it's not available in c89. */
1351 float val = strtod(str, &end);
1352 CHK(errno != ERANGE && !*end);
1353 f->defaultval.flt = val;
1354 break;
1355 }
1356 case UPB_TYPE_BOOL: {
1357 if (streql2(str, len, "false")) {
1358 f->defaultval.boolean = false;
1359 } else if (streql2(str, len, "true")) {
1360 f->defaultval.boolean = true;
1361 } else {
1362 return false;
1363 }
1364 break;
1365 }
1366 case UPB_TYPE_STRING:
1367 f->defaultval.str = newstr(ctx->alloc, str, len);
1368 break;
1369 case UPB_TYPE_BYTES:
1370 /* XXX: need to interpret the C-escaped value. */
1371 f->defaultval.str = newstr(ctx->alloc, str, len);
1372 break;
1373 case UPB_TYPE_MESSAGE:
1374 /* Should not have a default value. */
1375 return false;
1376 }
1377 return true;
1378 }
1379
set_default_default(const symtab_addctx * ctx,upb_fielddef * f)1380 static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) {
1381 switch (upb_fielddef_type(f)) {
1382 case UPB_TYPE_INT32:
1383 case UPB_TYPE_INT64:
1384 case UPB_TYPE_ENUM:
1385 f->defaultval.sint = 0;
1386 break;
1387 case UPB_TYPE_UINT64:
1388 case UPB_TYPE_UINT32:
1389 f->defaultval.uint = 0;
1390 break;
1391 case UPB_TYPE_DOUBLE:
1392 case UPB_TYPE_FLOAT:
1393 f->defaultval.dbl = 0;
1394 break;
1395 case UPB_TYPE_STRING:
1396 case UPB_TYPE_BYTES:
1397 f->defaultval.str = newstr(ctx->alloc, NULL, 0);
1398 break;
1399 case UPB_TYPE_BOOL:
1400 f->defaultval.boolean = false;
1401 break;
1402 case UPB_TYPE_MESSAGE:
1403 break;
1404 }
1405 }
1406
create_fielddef(const symtab_addctx * ctx,const char * prefix,upb_msgdef * m,const google_protobuf_FieldDescriptorProto * field_proto)1407 static bool create_fielddef(
1408 const symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
1409 const google_protobuf_FieldDescriptorProto *field_proto) {
1410 upb_alloc *alloc = ctx->alloc;
1411 upb_fielddef *f;
1412 const google_protobuf_FieldOptions *options;
1413 upb_strview name;
1414 const char *full_name;
1415 const char *json_name;
1416 const char *shortname;
1417 uint32_t field_number;
1418
1419 if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
1420 upb_status_seterrmsg(ctx->status, "field has no name");
1421 return false;
1422 }
1423
1424 name = google_protobuf_FieldDescriptorProto_name(field_proto);
1425 CHK(upb_isident(name, false, ctx->status));
1426 full_name = makefullname(ctx, prefix, name);
1427 shortname = shortdefname(full_name);
1428
1429 if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
1430 json_name = strviewdup(
1431 ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
1432 } else {
1433 json_name = makejsonname(shortname, ctx->alloc);
1434 }
1435
1436 field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
1437
1438 if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
1439 upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number);
1440 return false;
1441 }
1442
1443 if (m) {
1444 /* direct message field. */
1445 upb_value v, field_v, json_v;
1446 size_t json_size;
1447
1448 f = (upb_fielddef*)&m->fields[m->field_count++];
1449 f->msgdef = m;
1450 f->is_extension_ = false;
1451
1452 if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
1453 upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname);
1454 return false;
1455 }
1456
1457 if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
1458 upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name);
1459 return false;
1460 }
1461
1462 if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
1463 upb_status_seterrf(ctx->status, "duplicate field number (%u)",
1464 field_number);
1465 return false;
1466 }
1467
1468 field_v = pack_def(f, UPB_DEFTYPE_FIELD);
1469 json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
1470 v = upb_value_constptr(f);
1471 json_size = strlen(json_name);
1472
1473 CHK_OOM(
1474 upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc));
1475 CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc));
1476
1477 if (strcmp(shortname, json_name) != 0) {
1478 upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc);
1479 }
1480
1481 if (ctx->layouts) {
1482 const upb_msglayout_field *fields = m->layout->fields;
1483 int count = m->layout->field_count;
1484 bool found = false;
1485 int i;
1486 for (i = 0; i < count; i++) {
1487 if (fields[i].number == field_number) {
1488 f->layout_index = i;
1489 found = true;
1490 break;
1491 }
1492 }
1493 UPB_ASSERT(found);
1494 }
1495 } else {
1496 /* extension field. */
1497 f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
1498 f->is_extension_ = true;
1499 CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)));
1500 }
1501
1502 f->full_name = full_name;
1503 f->json_name = json_name;
1504 f->file = ctx->file;
1505 f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
1506 f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
1507 f->number_ = field_number;
1508 f->oneof = NULL;
1509 f->proto3_optional_ =
1510 google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
1511
1512 /* We can't resolve the subdef or (in the case of extensions) the containing
1513 * message yet, because it may not have been defined yet. We stash a pointer
1514 * to the field_proto until later when we can properly resolve it. */
1515 f->sub.unresolved = field_proto;
1516
1517 if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
1518 upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)",
1519 f->full_name);
1520 return false;
1521 }
1522
1523 if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
1524 int oneof_index =
1525 google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
1526 upb_oneofdef *oneof;
1527 upb_value v = upb_value_constptr(f);
1528
1529 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1530 upb_status_seterrf(ctx->status,
1531 "fields in oneof must have OPTIONAL label (%s)",
1532 f->full_name);
1533 return false;
1534 }
1535
1536 if (!m) {
1537 upb_status_seterrf(ctx->status,
1538 "oneof_index provided for extension field (%s)",
1539 f->full_name);
1540 return false;
1541 }
1542
1543 if (oneof_index >= m->oneof_count) {
1544 upb_status_seterrf(ctx->status, "oneof_index out of range (%s)",
1545 f->full_name);
1546 return false;
1547 }
1548
1549 oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
1550 f->oneof = oneof;
1551
1552 CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
1553 CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
1554 } else {
1555 f->oneof = NULL;
1556 }
1557
1558 options = google_protobuf_FieldDescriptorProto_has_options(field_proto) ?
1559 google_protobuf_FieldDescriptorProto_options(field_proto) : NULL;
1560
1561 if (options && google_protobuf_FieldOptions_has_packed(options)) {
1562 f->packed_ = google_protobuf_FieldOptions_packed(options);
1563 } else {
1564 /* Repeated fields default to packed for proto3 only. */
1565 f->packed_ = upb_fielddef_isprimitive(f) &&
1566 f->label_ == UPB_LABEL_REPEATED && f->file->syntax == UPB_SYNTAX_PROTO3;
1567 }
1568
1569 if (options) {
1570 f->lazy_ = google_protobuf_FieldOptions_lazy(options);
1571 } else {
1572 f->lazy_ = false;
1573 }
1574
1575 return true;
1576 }
1577
create_enumdef(const symtab_addctx * ctx,const char * prefix,const google_protobuf_EnumDescriptorProto * enum_proto)1578 static bool create_enumdef(
1579 const symtab_addctx *ctx, const char *prefix,
1580 const google_protobuf_EnumDescriptorProto *enum_proto) {
1581 upb_enumdef *e;
1582 const google_protobuf_EnumValueDescriptorProto *const *values;
1583 upb_strview name;
1584 size_t i, n;
1585
1586 name = google_protobuf_EnumDescriptorProto_name(enum_proto);
1587 CHK(upb_isident(name, false, ctx->status));
1588
1589 e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
1590 e->full_name = makefullname(ctx, prefix, name);
1591 CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)));
1592
1593 CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc));
1594 CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
1595
1596 e->file = ctx->file;
1597 e->defaultval = 0;
1598
1599 values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
1600
1601 if (n == 0) {
1602 upb_status_seterrf(ctx->status,
1603 "enums must contain at least one value (%s)",
1604 e->full_name);
1605 return false;
1606 }
1607
1608 for (i = 0; i < n; i++) {
1609 const google_protobuf_EnumValueDescriptorProto *value = values[i];
1610 upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
1611 char *name2 = strviewdup(ctx, name);
1612 int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
1613 upb_value v = upb_value_int32(num);
1614
1615 if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
1616 upb_status_seterrf(ctx->status,
1617 "for proto3, the first enum value must be zero (%s)",
1618 e->full_name);
1619 return false;
1620 }
1621
1622 if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
1623 upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2);
1624 return false;
1625 }
1626
1627 CHK_OOM(name2)
1628 CHK_OOM(
1629 upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
1630
1631 if (!upb_inttable_lookup(&e->iton, num, NULL)) {
1632 upb_value v = upb_value_cstr(name2);
1633 CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
1634 }
1635 }
1636
1637 upb_inttable_compact2(&e->iton, ctx->alloc);
1638
1639 return true;
1640 }
1641
create_msgdef(symtab_addctx * ctx,const char * prefix,const google_protobuf_DescriptorProto * msg_proto)1642 static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
1643 const google_protobuf_DescriptorProto *msg_proto) {
1644 upb_msgdef *m;
1645 const google_protobuf_MessageOptions *options;
1646 const google_protobuf_OneofDescriptorProto *const *oneofs;
1647 const google_protobuf_FieldDescriptorProto *const *fields;
1648 const google_protobuf_EnumDescriptorProto *const *enums;
1649 const google_protobuf_DescriptorProto *const *msgs;
1650 size_t i, n;
1651 upb_strview name;
1652
1653 name = google_protobuf_DescriptorProto_name(msg_proto);
1654 CHK(upb_isident(name, false, ctx->status));
1655
1656 m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
1657 m->full_name = makefullname(ctx, prefix, name);
1658 CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)));
1659
1660 CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1661 CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
1662
1663 m->file = ctx->file;
1664 m->map_entry = false;
1665
1666 options = google_protobuf_DescriptorProto_options(msg_proto);
1667
1668 if (options) {
1669 m->map_entry = google_protobuf_MessageOptions_map_entry(options);
1670 }
1671
1672 if (ctx->layouts) {
1673 m->layout = *ctx->layouts;
1674 ctx->layouts++;
1675 } else {
1676 /* Allocate now (to allow cross-linking), populate later. */
1677 m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout));
1678 }
1679
1680 oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n);
1681 m->oneof_count = 0;
1682 m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n);
1683 for (i = 0; i < n; i++) {
1684 CHK(create_oneofdef(ctx, m, oneofs[i]));
1685 }
1686
1687 fields = google_protobuf_DescriptorProto_field(msg_proto, &n);
1688 m->field_count = 0;
1689 m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n);
1690 for (i = 0; i < n; i++) {
1691 CHK(create_fielddef(ctx, m->full_name, m, fields[i]));
1692 }
1693
1694 CHK(assign_msg_indices(m, ctx->status));
1695 CHK(check_oneofs(m, ctx->status));
1696 assign_msg_wellknowntype(m);
1697 upb_inttable_compact2(&m->itof, ctx->alloc);
1698
1699 /* This message is built. Now build nested messages and enums. */
1700
1701 enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1702 for (i = 0; i < n; i++) {
1703 CHK(create_enumdef(ctx, m->full_name, enums[i]));
1704 }
1705
1706 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1707 for (i = 0; i < n; i++) {
1708 CHK(create_msgdef(ctx, m->full_name, msgs[i]));
1709 }
1710
1711 return true;
1712 }
1713
1714 typedef struct {
1715 int msg_count;
1716 int enum_count;
1717 int ext_count;
1718 } decl_counts;
1719
count_types_in_msg(const google_protobuf_DescriptorProto * msg_proto,decl_counts * counts)1720 static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
1721 decl_counts *counts) {
1722 const google_protobuf_DescriptorProto *const *msgs;
1723 size_t i, n;
1724
1725 counts->msg_count++;
1726
1727 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
1728 for (i = 0; i < n; i++) {
1729 count_types_in_msg(msgs[i], counts);
1730 }
1731
1732 google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
1733 counts->enum_count += n;
1734
1735 google_protobuf_DescriptorProto_extension(msg_proto, &n);
1736 counts->ext_count += n;
1737 }
1738
count_types_in_file(const google_protobuf_FileDescriptorProto * file_proto,decl_counts * counts)1739 static void count_types_in_file(
1740 const google_protobuf_FileDescriptorProto *file_proto,
1741 decl_counts *counts) {
1742 const google_protobuf_DescriptorProto *const *msgs;
1743 size_t i, n;
1744
1745 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
1746 for (i = 0; i < n; i++) {
1747 count_types_in_msg(msgs[i], counts);
1748 }
1749
1750 google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
1751 counts->enum_count += n;
1752
1753 google_protobuf_FileDescriptorProto_extension(file_proto, &n);
1754 counts->ext_count += n;
1755 }
1756
resolve_fielddef(const symtab_addctx * ctx,const char * prefix,upb_fielddef * f)1757 static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix,
1758 upb_fielddef *f) {
1759 upb_strview name;
1760 const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
1761
1762 if (f->is_extension_) {
1763 if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
1764 upb_status_seterrf(ctx->status,
1765 "extension for field '%s' had no extendee",
1766 f->full_name);
1767 return false;
1768 }
1769
1770 name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
1771 f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1772 CHK(f->msgdef);
1773 }
1774
1775 if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
1776 !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
1777 upb_status_seterrf(ctx->status, "field '%s' is missing type name",
1778 f->full_name);
1779 return false;
1780 }
1781
1782 name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
1783
1784 if (upb_fielddef_issubmsg(f)) {
1785 f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
1786 CHK(f->sub.msgdef);
1787 } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
1788 f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
1789 CHK(f->sub.enumdef);
1790 }
1791
1792 /* Have to delay resolving of the default value until now because of the enum
1793 * case, since enum defaults are specified with a label. */
1794 if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
1795 upb_strview defaultval =
1796 google_protobuf_FieldDescriptorProto_default_value(field_proto);
1797
1798 if (f->file->syntax == UPB_SYNTAX_PROTO3) {
1799 upb_status_seterrf(ctx->status,
1800 "proto3 fields cannot have explicit defaults (%s)",
1801 f->full_name);
1802 return false;
1803 }
1804
1805 if (upb_fielddef_issubmsg(f)) {
1806 upb_status_seterrf(ctx->status,
1807 "message fields cannot have explicit defaults (%s)",
1808 f->full_name);
1809 return false;
1810 }
1811
1812 if (!parse_default(ctx, defaultval.data, defaultval.size, f)) {
1813 upb_status_seterrf(ctx->status,
1814 "couldn't parse default '" UPB_STRVIEW_FORMAT
1815 "' for field (%s)",
1816 UPB_STRVIEW_ARGS(defaultval), f->full_name);
1817 return false;
1818 }
1819 } else {
1820 set_default_default(ctx, f);
1821 }
1822
1823 return true;
1824 }
1825
build_filedef(symtab_addctx * ctx,upb_filedef * file,const google_protobuf_FileDescriptorProto * file_proto)1826 static bool build_filedef(
1827 symtab_addctx *ctx, upb_filedef *file,
1828 const google_protobuf_FileDescriptorProto *file_proto) {
1829 upb_alloc *alloc = ctx->alloc;
1830 const google_protobuf_FileOptions *file_options_proto;
1831 const google_protobuf_DescriptorProto *const *msgs;
1832 const google_protobuf_EnumDescriptorProto *const *enums;
1833 const google_protobuf_FieldDescriptorProto *const *exts;
1834 const upb_strview* strs;
1835 size_t i, n;
1836 decl_counts counts = {0, 0, 0};
1837
1838 count_types_in_file(file_proto, &counts);
1839
1840 file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count);
1841 file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count);
1842 file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count);
1843
1844 CHK_OOM(counts.msg_count == 0 || file->msgs);
1845 CHK_OOM(counts.enum_count == 0 || file->enums);
1846 CHK_OOM(counts.ext_count == 0 || file->exts);
1847
1848 /* We increment these as defs are added. */
1849 file->msg_count = 0;
1850 file->enum_count = 0;
1851 file->ext_count = 0;
1852
1853 if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
1854 upb_status_seterrmsg(ctx->status, "File has no name");
1855 return false;
1856 }
1857
1858 file->name =
1859 strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
1860 file->phpprefix = NULL;
1861 file->phpnamespace = NULL;
1862
1863 if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
1864 upb_strview package =
1865 google_protobuf_FileDescriptorProto_package(file_proto);
1866 CHK(upb_isident(package, true, ctx->status));
1867 file->package = strviewdup(ctx, package);
1868 } else {
1869 file->package = NULL;
1870 }
1871
1872 if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
1873 upb_strview syntax =
1874 google_protobuf_FileDescriptorProto_syntax(file_proto);
1875
1876 if (streql_view(syntax, "proto2")) {
1877 file->syntax = UPB_SYNTAX_PROTO2;
1878 } else if (streql_view(syntax, "proto3")) {
1879 file->syntax = UPB_SYNTAX_PROTO3;
1880 } else {
1881 upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
1882 UPB_STRVIEW_ARGS(syntax));
1883 return false;
1884 }
1885 } else {
1886 file->syntax = UPB_SYNTAX_PROTO2;
1887 }
1888
1889 /* Read options. */
1890 file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
1891 if (file_options_proto) {
1892 if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
1893 file->phpprefix = strviewdup(
1894 ctx,
1895 google_protobuf_FileOptions_php_class_prefix(file_options_proto));
1896 }
1897 if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
1898 file->phpnamespace = strviewdup(
1899 ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
1900 }
1901 }
1902
1903 /* Verify dependencies. */
1904 strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
1905 file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ;
1906 CHK_OOM(n == 0 || file->deps);
1907
1908 for (i = 0; i < n; i++) {
1909 upb_strview dep_name = strs[i];
1910 upb_value v;
1911 if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
1912 dep_name.size, &v)) {
1913 upb_status_seterrf(ctx->status,
1914 "Depends on file '" UPB_STRVIEW_FORMAT
1915 "', but it has not been loaded",
1916 UPB_STRVIEW_ARGS(dep_name));
1917 return false;
1918 }
1919 file->deps[i] = upb_value_getconstptr(v);
1920 }
1921
1922 /* Create messages. */
1923 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
1924 for (i = 0; i < n; i++) {
1925 CHK(create_msgdef(ctx, file->package, msgs[i]));
1926 }
1927
1928 /* Create enums. */
1929 enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
1930 for (i = 0; i < n; i++) {
1931 CHK(create_enumdef(ctx, file->package, enums[i]));
1932 }
1933
1934 /* Create extensions. */
1935 exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
1936 file->exts = upb_malloc(alloc, sizeof(*file->exts) * n);
1937 CHK_OOM(n == 0 || file->exts);
1938 for (i = 0; i < n; i++) {
1939 CHK(create_fielddef(ctx, file->package, NULL, exts[i]));
1940 }
1941
1942 /* Now that all names are in the table, build layouts and resolve refs. */
1943 for (i = 0; i < (size_t)file->ext_count; i++) {
1944 CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]));
1945 }
1946
1947 for (i = 0; i < (size_t)file->msg_count; i++) {
1948 const upb_msgdef *m = &file->msgs[i];
1949 int j;
1950 for (j = 0; j < m->field_count; j++) {
1951 CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]));
1952 }
1953 }
1954
1955 if (!ctx->layouts) {
1956 for (i = 0; i < (size_t)file->msg_count; i++) {
1957 const upb_msgdef *m = &file->msgs[i];
1958 make_layout(ctx->symtab, m);
1959 }
1960 }
1961
1962 return true;
1963 }
1964
upb_symtab_addtotabs(upb_symtab * s,symtab_addctx * ctx)1965 static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx) {
1966 const upb_filedef *file = ctx->file;
1967 upb_alloc *alloc = upb_arena_alloc(s->arena);
1968 upb_strtable_iter iter;
1969
1970 CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name),
1971 upb_value_constptr(file), alloc));
1972
1973 upb_strtable_begin(&iter, ctx->addtab);
1974 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
1975 upb_strview key = upb_strtable_iter_key(&iter);
1976 upb_value value = upb_strtable_iter_value(&iter);
1977 CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc));
1978 }
1979
1980 return true;
1981 }
1982
1983 /* upb_filedef ****************************************************************/
1984
upb_filedef_name(const upb_filedef * f)1985 const char *upb_filedef_name(const upb_filedef *f) {
1986 return f->name;
1987 }
1988
upb_filedef_package(const upb_filedef * f)1989 const char *upb_filedef_package(const upb_filedef *f) {
1990 return f->package;
1991 }
1992
upb_filedef_phpprefix(const upb_filedef * f)1993 const char *upb_filedef_phpprefix(const upb_filedef *f) {
1994 return f->phpprefix;
1995 }
1996
upb_filedef_phpnamespace(const upb_filedef * f)1997 const char *upb_filedef_phpnamespace(const upb_filedef *f) {
1998 return f->phpnamespace;
1999 }
2000
upb_filedef_syntax(const upb_filedef * f)2001 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
2002 return f->syntax;
2003 }
2004
upb_filedef_msgcount(const upb_filedef * f)2005 int upb_filedef_msgcount(const upb_filedef *f) {
2006 return f->msg_count;
2007 }
2008
upb_filedef_depcount(const upb_filedef * f)2009 int upb_filedef_depcount(const upb_filedef *f) {
2010 return f->dep_count;
2011 }
2012
upb_filedef_enumcount(const upb_filedef * f)2013 int upb_filedef_enumcount(const upb_filedef *f) {
2014 return f->enum_count;
2015 }
2016
upb_filedef_dep(const upb_filedef * f,int i)2017 const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
2018 return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
2019 }
2020
upb_filedef_msg(const upb_filedef * f,int i)2021 const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
2022 return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
2023 }
2024
upb_filedef_enum(const upb_filedef * f,int i)2025 const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
2026 return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
2027 }
2028
upb_symtab_free(upb_symtab * s)2029 void upb_symtab_free(upb_symtab *s) {
2030 upb_arena_free(s->arena);
2031 upb_gfree(s);
2032 }
2033
upb_symtab_new(void)2034 upb_symtab *upb_symtab_new(void) {
2035 upb_symtab *s = upb_gmalloc(sizeof(*s));
2036 upb_alloc *alloc;
2037
2038 if (!s) {
2039 return NULL;
2040 }
2041
2042 s->arena = upb_arena_new();
2043 alloc = upb_arena_alloc(s->arena);
2044
2045 if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) ||
2046 !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) {
2047 upb_arena_free(s->arena);
2048 upb_gfree(s);
2049 s = NULL;
2050 }
2051 return s;
2052 }
2053
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)2054 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
2055 upb_value v;
2056 return upb_strtable_lookup(&s->syms, sym, &v) ?
2057 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
2058 }
2059
upb_symtab_lookupmsg2(const upb_symtab * s,const char * sym,size_t len)2060 const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
2061 size_t len) {
2062 upb_value v;
2063 return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
2064 unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
2065 }
2066
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)2067 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
2068 upb_value v;
2069 return upb_strtable_lookup(&s->syms, sym, &v) ?
2070 unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
2071 }
2072
upb_symtab_lookupfile(const upb_symtab * s,const char * name)2073 const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
2074 upb_value v;
2075 return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
2076 : NULL;
2077 }
2078
upb_symtab_lookupfile2(const upb_symtab * s,const char * name,size_t len)2079 const upb_filedef *upb_symtab_lookupfile2(
2080 const upb_symtab *s, const char *name, size_t len) {
2081 upb_value v;
2082 return upb_strtable_lookup2(&s->files, name, len, &v) ?
2083 upb_value_getconstptr(v) : NULL;
2084 }
2085
upb_symtab_filecount(const upb_symtab * s)2086 int upb_symtab_filecount(const upb_symtab *s) {
2087 return (int)upb_strtable_count(&s->files);
2088 }
2089
_upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,const upb_msglayout ** layouts,upb_status * status)2090 static const upb_filedef *_upb_symtab_addfile(
2091 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2092 const upb_msglayout **layouts, upb_status *status) {
2093 upb_arena *tmparena = upb_arena_new();
2094 upb_strtable addtab;
2095 upb_alloc *alloc = upb_arena_alloc(s->arena);
2096 upb_filedef *file = upb_malloc(alloc, sizeof(*file));
2097 bool ok;
2098 symtab_addctx ctx;
2099
2100 ctx.file = file;
2101 ctx.symtab = s;
2102 ctx.alloc = alloc;
2103 ctx.tmp = upb_arena_alloc(tmparena);
2104 ctx.addtab = &addtab;
2105 ctx.layouts = layouts;
2106 ctx.status = status;
2107
2108 ok = file && upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) &&
2109 build_filedef(&ctx, file, file_proto) && upb_symtab_addtotabs(s, &ctx);
2110
2111 upb_arena_free(tmparena);
2112 return ok ? file : NULL;
2113 }
2114
upb_symtab_addfile(upb_symtab * s,const google_protobuf_FileDescriptorProto * file_proto,upb_status * status)2115 const upb_filedef *upb_symtab_addfile(
2116 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
2117 upb_status *status) {
2118 return _upb_symtab_addfile(s, file_proto, NULL, status);
2119 }
2120
2121 /* Include here since we want most of this file to be stdio-free. */
2122 #include <stdio.h>
2123
_upb_symtab_loaddefinit(upb_symtab * s,const upb_def_init * init)2124 bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
2125 /* Since this function should never fail (it would indicate a bug in upb) we
2126 * print errors to stderr instead of returning error status to the user. */
2127 upb_def_init **deps = init->deps;
2128 google_protobuf_FileDescriptorProto *file;
2129 upb_arena *arena;
2130 upb_status status;
2131
2132 upb_status_clear(&status);
2133
2134 if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
2135 return true;
2136 }
2137
2138 arena = upb_arena_new();
2139
2140 for (; *deps; deps++) {
2141 if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
2142 }
2143
2144 file = google_protobuf_FileDescriptorProto_parse(
2145 init->descriptor.data, init->descriptor.size, arena);
2146
2147 if (!file) {
2148 upb_status_seterrf(
2149 &status,
2150 "Failed to parse compiled-in descriptor for file '%s'. This should "
2151 "never happen.",
2152 init->filename);
2153 goto err;
2154 }
2155
2156 if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
2157
2158 upb_arena_free(arena);
2159 return true;
2160
2161 err:
2162 fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
2163 upb_status_errmsg(&status));
2164 upb_arena_free(arena);
2165 return false;
2166 }
2167
2168 #undef CHK
2169 #undef CHK_OOM
2170