1 /*
2 ** protobuf decoder bytecode compiler
3 **
4 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
5 ** according to that specific schema and destination handlers.
6 **
7 ** Bytecode definition is in decoder.int.h.
8 */
9
10 #include <stdarg.h>
11 #include "upb/pb/decoder.int.h"
12 #include "upb/pb/varint.int.h"
13
14 #ifdef UPB_DUMP_BYTECODE
15 #include <stdio.h>
16 #endif
17
18 #include "upb/port_def.inc"
19
20 #define MAXLABEL 5
21 #define EMPTYLABEL -1
22
23 /* upb_pbdecodermethod ********************************************************/
24
freemethod(upb_pbdecodermethod * method)25 static void freemethod(upb_pbdecodermethod *method) {
26 upb_inttable_uninit(&method->dispatch);
27 upb_gfree(method);
28 }
29
newmethod(const upb_handlers * dest_handlers,mgroup * group)30 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
31 mgroup *group) {
32 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
33 upb_byteshandler_init(&ret->input_handler_);
34
35 ret->group = group;
36 ret->dest_handlers_ = dest_handlers;
37 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
38
39 return ret;
40 }
41
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)42 const upb_handlers *upb_pbdecodermethod_desthandlers(
43 const upb_pbdecodermethod *m) {
44 return m->dest_handlers_;
45 }
46
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)47 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
48 const upb_pbdecodermethod *m) {
49 return &m->input_handler_;
50 }
51
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)52 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
53 return m->is_native_;
54 }
55
56
57 /* mgroup *********************************************************************/
58
freegroup(mgroup * g)59 static void freegroup(mgroup *g) {
60 upb_inttable_iter i;
61
62 upb_inttable_begin(&i, &g->methods);
63 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
64 freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
65 }
66
67 upb_inttable_uninit(&g->methods);
68 upb_gfree(g->bytecode);
69 upb_gfree(g);
70 }
71
newgroup(void)72 mgroup *newgroup(void) {
73 mgroup *g = upb_gmalloc(sizeof(*g));
74 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
75 g->bytecode = NULL;
76 g->bytecode_end = NULL;
77 return g;
78 }
79
80
81 /* bytecode compiler **********************************************************/
82
83 /* Data used only at compilation time. */
84 typedef struct {
85 mgroup *group;
86
87 uint32_t *pc;
88 int fwd_labels[MAXLABEL];
89 int back_labels[MAXLABEL];
90
91 /* For fields marked "lazy", parse them lazily or eagerly? */
92 bool lazy;
93 } compiler;
94
newcompiler(mgroup * group,bool lazy)95 static compiler *newcompiler(mgroup *group, bool lazy) {
96 compiler *ret = upb_gmalloc(sizeof(*ret));
97 int i;
98
99 ret->group = group;
100 ret->lazy = lazy;
101 for (i = 0; i < MAXLABEL; i++) {
102 ret->fwd_labels[i] = EMPTYLABEL;
103 ret->back_labels[i] = EMPTYLABEL;
104 }
105 return ret;
106 }
107
freecompiler(compiler * c)108 static void freecompiler(compiler *c) {
109 upb_gfree(c);
110 }
111
112 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
113
114 /* How many words an instruction is. */
instruction_len(uint32_t instr)115 static int instruction_len(uint32_t instr) {
116 switch (getop(instr)) {
117 case OP_SETDISPATCH: return 1 + ptr_words;
118 case OP_TAGN: return 3;
119 case OP_SETBIGGROUPNUM: return 2;
120 default: return 1;
121 }
122 }
123
op_has_longofs(int32_t instruction)124 bool op_has_longofs(int32_t instruction) {
125 switch (getop(instruction)) {
126 case OP_CALL:
127 case OP_BRANCH:
128 case OP_CHECKDELIM:
129 return true;
130 /* The "tag" instructions only have 8 bytes available for the jump target,
131 * but that is ok because these opcodes only require short jumps. */
132 case OP_TAG1:
133 case OP_TAG2:
134 case OP_TAGN:
135 return false;
136 default:
137 UPB_ASSERT(false);
138 return false;
139 }
140 }
141
getofs(uint32_t instruction)142 static int32_t getofs(uint32_t instruction) {
143 if (op_has_longofs(instruction)) {
144 return (int32_t)instruction >> 8;
145 } else {
146 return (int8_t)(instruction >> 8);
147 }
148 }
149
setofs(uint32_t * instruction,int32_t ofs)150 static void setofs(uint32_t *instruction, int32_t ofs) {
151 if (op_has_longofs(*instruction)) {
152 *instruction = getop(*instruction) | (uint32_t)ofs << 8;
153 } else {
154 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
155 }
156 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
157 }
158
pcofs(compiler * c)159 static uint32_t pcofs(compiler *c) {
160 return (uint32_t)(c->pc - c->group->bytecode);
161 }
162
163 /* Defines a local label at the current PC location. All previous forward
164 * references are updated to point to this location. The location is noted
165 * for any future backward references. */
label(compiler * c,unsigned int label)166 static void label(compiler *c, unsigned int label) {
167 int val;
168 uint32_t *codep;
169
170 UPB_ASSERT(label < MAXLABEL);
171 val = c->fwd_labels[label];
172 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
173 while (codep) {
174 int ofs = getofs(*codep);
175 setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep)));
176 codep = ofs ? codep + ofs : NULL;
177 }
178 c->fwd_labels[label] = EMPTYLABEL;
179 c->back_labels[label] = pcofs(c);
180 }
181
182 /* Creates a reference to a numbered label; either a forward reference
183 * (positive arg) or backward reference (negative arg). For forward references
184 * the value returned now is actually a "next" pointer into a linked list of all
185 * instructions that use this label and will be patched later when the label is
186 * defined with label().
187 *
188 * The returned value is the offset that should be written into the instruction.
189 */
labelref(compiler * c,int label)190 static int32_t labelref(compiler *c, int label) {
191 UPB_ASSERT(label < MAXLABEL);
192 if (label == LABEL_DISPATCH) {
193 /* No resolving required. */
194 return 0;
195 } else if (label < 0) {
196 /* Backward local label. Relative to the next instruction. */
197 uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode);
198 return c->back_labels[-label] - from;
199 } else {
200 /* Forward local label: prepend to (possibly-empty) linked list. */
201 int *lptr = &c->fwd_labels[label];
202 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
203 *lptr = pcofs(c);
204 return ret;
205 }
206 }
207
put32(compiler * c,uint32_t v)208 static void put32(compiler *c, uint32_t v) {
209 mgroup *g = c->group;
210 if (c->pc == g->bytecode_end) {
211 int ofs = pcofs(c);
212 size_t oldsize = g->bytecode_end - g->bytecode;
213 size_t newsize = UPB_MAX(oldsize * 2, 64);
214 /* TODO(haberman): handle OOM. */
215 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
216 newsize * sizeof(uint32_t));
217 g->bytecode_end = g->bytecode + newsize;
218 c->pc = g->bytecode + ofs;
219 }
220 *c->pc++ = v;
221 }
222
putop(compiler * c,int op,...)223 static void putop(compiler *c, int op, ...) {
224 va_list ap;
225 va_start(ap, op);
226
227 switch (op) {
228 case OP_SETDISPATCH: {
229 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
230 put32(c, OP_SETDISPATCH);
231 put32(c, (uint32_t)ptr);
232 if (sizeof(uintptr_t) > sizeof(uint32_t))
233 put32(c, (uint64_t)ptr >> 32);
234 break;
235 }
236 case OP_STARTMSG:
237 case OP_ENDMSG:
238 case OP_PUSHLENDELIM:
239 case OP_POP:
240 case OP_SETDELIM:
241 case OP_HALT:
242 case OP_RET:
243 case OP_DISPATCH:
244 put32(c, op);
245 break;
246 case OP_PARSE_DOUBLE:
247 case OP_PARSE_FLOAT:
248 case OP_PARSE_INT64:
249 case OP_PARSE_UINT64:
250 case OP_PARSE_INT32:
251 case OP_PARSE_FIXED64:
252 case OP_PARSE_FIXED32:
253 case OP_PARSE_BOOL:
254 case OP_PARSE_UINT32:
255 case OP_PARSE_SFIXED32:
256 case OP_PARSE_SFIXED64:
257 case OP_PARSE_SINT32:
258 case OP_PARSE_SINT64:
259 case OP_STARTSEQ:
260 case OP_ENDSEQ:
261 case OP_STARTSUBMSG:
262 case OP_ENDSUBMSG:
263 case OP_STARTSTR:
264 case OP_STRING:
265 case OP_ENDSTR:
266 case OP_PUSHTAGDELIM:
267 put32(c, op | va_arg(ap, upb_selector_t) << 8);
268 break;
269 case OP_SETBIGGROUPNUM:
270 put32(c, op);
271 put32(c, va_arg(ap, int));
272 break;
273 case OP_CALL: {
274 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
275 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
276 break;
277 }
278 case OP_CHECKDELIM:
279 case OP_BRANCH: {
280 uint32_t instruction = op;
281 int label = va_arg(ap, int);
282 setofs(&instruction, labelref(c, label));
283 put32(c, instruction);
284 break;
285 }
286 case OP_TAG1:
287 case OP_TAG2: {
288 int label = va_arg(ap, int);
289 uint64_t tag = va_arg(ap, uint64_t);
290 uint32_t instruction = (uint32_t)(op | (tag << 16));
291 UPB_ASSERT(tag <= 0xffff);
292 setofs(&instruction, labelref(c, label));
293 put32(c, instruction);
294 break;
295 }
296 case OP_TAGN: {
297 int label = va_arg(ap, int);
298 uint64_t tag = va_arg(ap, uint64_t);
299 uint32_t instruction = op | (upb_value_size(tag) << 16);
300 setofs(&instruction, labelref(c, label));
301 put32(c, instruction);
302 put32(c, (uint32_t)tag);
303 put32(c, tag >> 32);
304 break;
305 }
306 }
307
308 va_end(ap);
309 }
310
311 #if defined(UPB_DUMP_BYTECODE)
312
upb_pbdecoder_getopname(unsigned int op)313 const char *upb_pbdecoder_getopname(unsigned int op) {
314 #define QUOTE(x) #x
315 #define EXPAND_AND_QUOTE(x) QUOTE(x)
316 #define OPNAME(x) OP_##x
317 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
318 #define T(x) OP(PARSE_##x)
319 /* Keep in sync with list in decoder.int.h. */
320 switch ((opcode)op) {
321 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
322 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
323 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
324 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
325 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
326 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
327 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
328 }
329 return "<unknown op>";
330 #undef OP
331 #undef T
332 }
333
334 #endif
335
336 #ifdef UPB_DUMP_BYTECODE
337
dumpbc(uint32_t * p,uint32_t * end,FILE * f)338 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
339
340 uint32_t *begin = p;
341
342 while (p < end) {
343 fprintf(f, "%p %8tx", p, p - begin);
344 uint32_t instr = *p++;
345 uint8_t op = getop(instr);
346 fprintf(f, " %s", upb_pbdecoder_getopname(op));
347 switch ((opcode)op) {
348 case OP_SETDISPATCH: {
349 const upb_inttable *dispatch;
350 memcpy(&dispatch, p, sizeof(void*));
351 p += ptr_words;
352 const upb_pbdecodermethod *method =
353 (void *)((char *)dispatch -
354 offsetof(upb_pbdecodermethod, dispatch));
355 fprintf(f, " %s", upb_msgdef_fullname(
356 upb_handlers_msgdef(method->dest_handlers_)));
357 break;
358 }
359 case OP_DISPATCH:
360 case OP_STARTMSG:
361 case OP_ENDMSG:
362 case OP_PUSHLENDELIM:
363 case OP_POP:
364 case OP_SETDELIM:
365 case OP_HALT:
366 case OP_RET:
367 break;
368 case OP_PARSE_DOUBLE:
369 case OP_PARSE_FLOAT:
370 case OP_PARSE_INT64:
371 case OP_PARSE_UINT64:
372 case OP_PARSE_INT32:
373 case OP_PARSE_FIXED64:
374 case OP_PARSE_FIXED32:
375 case OP_PARSE_BOOL:
376 case OP_PARSE_UINT32:
377 case OP_PARSE_SFIXED32:
378 case OP_PARSE_SFIXED64:
379 case OP_PARSE_SINT32:
380 case OP_PARSE_SINT64:
381 case OP_STARTSEQ:
382 case OP_ENDSEQ:
383 case OP_STARTSUBMSG:
384 case OP_ENDSUBMSG:
385 case OP_STARTSTR:
386 case OP_STRING:
387 case OP_ENDSTR:
388 case OP_PUSHTAGDELIM:
389 fprintf(f, " %d", instr >> 8);
390 break;
391 case OP_SETBIGGROUPNUM:
392 fprintf(f, " %d", *p++);
393 break;
394 case OP_CHECKDELIM:
395 case OP_CALL:
396 case OP_BRANCH:
397 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
398 break;
399 case OP_TAG1:
400 case OP_TAG2: {
401 fprintf(f, " tag:0x%x", instr >> 16);
402 if (getofs(instr)) {
403 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
404 }
405 break;
406 }
407 case OP_TAGN: {
408 uint64_t tag = *p++;
409 tag |= (uint64_t)*p++ << 32;
410 fprintf(f, " tag:0x%llx", (long long)tag);
411 fprintf(f, " n:%d", instr >> 16);
412 if (getofs(instr)) {
413 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
414 }
415 break;
416 }
417 }
418 fputs("\n", f);
419 }
420 }
421
422 #endif
423
get_encoded_tag(const upb_fielddef * f,int wire_type)424 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
425 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
426 uint64_t encoded_tag = upb_vencode32(tag);
427 /* No tag should be greater than 5 bytes. */
428 UPB_ASSERT(encoded_tag <= 0xffffffffff);
429 return encoded_tag;
430 }
431
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)432 static void putchecktag(compiler *c, const upb_fielddef *f,
433 int wire_type, int dest) {
434 uint64_t tag = get_encoded_tag(f, wire_type);
435 switch (upb_value_size(tag)) {
436 case 1:
437 putop(c, OP_TAG1, dest, tag);
438 break;
439 case 2:
440 putop(c, OP_TAG2, dest, tag);
441 break;
442 default:
443 putop(c, OP_TAGN, dest, tag);
444 break;
445 }
446 }
447
getsel(const upb_fielddef * f,upb_handlertype_t type)448 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
449 upb_selector_t selector;
450 bool ok = upb_handlers_getselector(f, type, &selector);
451 UPB_ASSERT(ok);
452 return selector;
453 }
454
455 /* Takes an existing, primary dispatch table entry and repacks it with a
456 * different alternate wire type. Called when we are inserting a secondary
457 * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)458 static uint64_t repack(uint64_t dispatch, int new_wt2) {
459 uint64_t ofs;
460 uint8_t wt1;
461 uint8_t old_wt2;
462 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
463 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
464 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
465 }
466
467 /* Marks the current bytecode position as the dispatch target for this message,
468 * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)469 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
470 const upb_fielddef *f, int wire_type) {
471 /* Offset is relative to msg base. */
472 uint64_t ofs = pcofs(c) - method->code_base.ofs;
473 uint32_t fn = upb_fielddef_number(f);
474 upb_inttable *d = &method->dispatch;
475 upb_value v;
476 if (upb_inttable_remove(d, fn, &v)) {
477 /* TODO: prioritize based on packed setting in .proto file. */
478 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
479 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
480 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
481 } else {
482 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
483 upb_inttable_insert(d, fn, upb_value_uint64(val));
484 }
485 }
486
putpush(compiler * c,const upb_fielddef * f)487 static void putpush(compiler *c, const upb_fielddef *f) {
488 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
489 putop(c, OP_PUSHLENDELIM);
490 } else {
491 uint32_t fn = upb_fielddef_number(f);
492 if (fn >= 1 << 24) {
493 putop(c, OP_PUSHTAGDELIM, 0);
494 putop(c, OP_SETBIGGROUPNUM, fn);
495 } else {
496 putop(c, OP_PUSHTAGDELIM, fn);
497 }
498 }
499 }
500
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)501 static upb_pbdecodermethod *find_submethod(const compiler *c,
502 const upb_pbdecodermethod *method,
503 const upb_fielddef *f) {
504 const upb_handlers *sub =
505 upb_handlers_getsubhandlers(method->dest_handlers_, f);
506 upb_value v;
507 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
508 ? upb_value_getptr(v)
509 : NULL;
510 }
511
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)512 static void putsel(compiler *c, opcode op, upb_selector_t sel,
513 const upb_handlers *h) {
514 if (upb_handlers_gethandler(h, sel, NULL)) {
515 putop(c, op, sel);
516 }
517 }
518
519 /* Puts an opcode to call a callback, but only if a callback actually exists for
520 * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)521 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
522 const upb_fielddef *f, upb_handlertype_t type) {
523 putsel(c, op, getsel(f, type), h);
524 }
525
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)526 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
527 if (!upb_fielddef_lazy(f))
528 return false;
529
530 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
531 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
532 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
533 }
534
535
536 /* bytecode compiler code generation ******************************************/
537
538 /* Symbolic names for our local labels. */
539 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
540 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
541 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
542 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
543
544 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)545 static void generate_msgfield(compiler *c, const upb_fielddef *f,
546 upb_pbdecodermethod *method) {
547 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
548 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
549 int wire_type;
550
551 if (!sub_m) {
552 /* Don't emit any code for this field at all; it will be parsed as an
553 * unknown field.
554 *
555 * TODO(haberman): we should change this to parse it as a string field
556 * instead. It will probably be faster, but more importantly, once we
557 * start vending unknown fields, a field shouldn't be treated as unknown
558 * just because it doesn't have subhandlers registered. */
559 return;
560 }
561
562 label(c, LABEL_FIELD);
563
564 wire_type =
565 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
566 ? UPB_WIRE_TYPE_DELIMITED
567 : UPB_WIRE_TYPE_START_GROUP;
568
569 if (upb_fielddef_isseq(f)) {
570 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
571 putchecktag(c, f, wire_type, LABEL_DISPATCH);
572 dispatchtarget(c, method, f, wire_type);
573 putop(c, OP_PUSHTAGDELIM, 0);
574 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
575 label(c, LABEL_LOOPSTART);
576 putpush(c, f);
577 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
578 putop(c, OP_CALL, sub_m);
579 putop(c, OP_POP);
580 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
581 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
582 putop(c, OP_SETDELIM);
583 }
584 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
585 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
586 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
587 label(c, LABEL_LOOPBREAK);
588 putop(c, OP_POP);
589 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
590 } else {
591 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
592 putchecktag(c, f, wire_type, LABEL_DISPATCH);
593 dispatchtarget(c, method, f, wire_type);
594 putpush(c, f);
595 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
596 putop(c, OP_CALL, sub_m);
597 putop(c, OP_POP);
598 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
599 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
600 putop(c, OP_SETDELIM);
601 }
602 }
603 }
604
605 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)606 static void generate_delimfield(compiler *c, const upb_fielddef *f,
607 upb_pbdecodermethod *method) {
608 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
609
610 label(c, LABEL_FIELD);
611 if (upb_fielddef_isseq(f)) {
612 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
613 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
614 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
615 putop(c, OP_PUSHTAGDELIM, 0);
616 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
617 label(c, LABEL_LOOPSTART);
618 putop(c, OP_PUSHLENDELIM);
619 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
620 /* Need to emit even if no handler to skip past the string. */
621 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
622 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
623 putop(c, OP_POP);
624 putop(c, OP_SETDELIM);
625 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
626 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
627 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
628 label(c, LABEL_LOOPBREAK);
629 putop(c, OP_POP);
630 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
631 } else {
632 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
633 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
634 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
635 putop(c, OP_PUSHLENDELIM);
636 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
637 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
638 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
639 putop(c, OP_POP);
640 putop(c, OP_SETDELIM);
641 }
642 }
643
644 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)645 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
646 upb_pbdecodermethod *method) {
647 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
648 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
649 opcode parse_type;
650 upb_selector_t sel;
651 int wire_type;
652
653 label(c, LABEL_FIELD);
654
655 /* From a decoding perspective, ENUM is the same as INT32. */
656 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
657 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
658
659 parse_type = (opcode)descriptor_type;
660
661 /* TODO(haberman): generate packed or non-packed first depending on "packed"
662 * setting in the fielddef. This will favor (in speed) whichever was
663 * specified. */
664
665 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
666 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
667 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
668 if (upb_fielddef_isseq(f)) {
669 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
670 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
671 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
672 putop(c, OP_PUSHLENDELIM);
673 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
674 label(c, LABEL_LOOPSTART);
675 putop(c, parse_type, sel);
676 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
677 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
678 dispatchtarget(c, method, f, wire_type);
679 putop(c, OP_PUSHTAGDELIM, 0);
680 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
681 label(c, LABEL_LOOPSTART);
682 putop(c, parse_type, sel);
683 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
684 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
685 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
686 label(c, LABEL_LOOPBREAK);
687 putop(c, OP_POP); /* Packed and non-packed join. */
688 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
689 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
690 } else {
691 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
692 putchecktag(c, f, wire_type, LABEL_DISPATCH);
693 dispatchtarget(c, method, f, wire_type);
694 putop(c, parse_type, sel);
695 }
696 }
697
698 /* Adds bytecode for parsing the given message to the given decoderplan,
699 * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)700 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
701 const upb_handlers *h;
702 const upb_msgdef *md;
703 uint32_t* start_pc;
704 upb_msg_field_iter i;
705 upb_value val;
706
707 UPB_ASSERT(method);
708
709 /* Clear all entries in the dispatch table. */
710 upb_inttable_uninit(&method->dispatch);
711 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
712
713 h = upb_pbdecodermethod_desthandlers(method);
714 md = upb_handlers_msgdef(h);
715
716 method->code_base.ofs = pcofs(c);
717 putop(c, OP_SETDISPATCH, &method->dispatch);
718 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
719 label(c, LABEL_FIELD);
720 start_pc = c->pc;
721 for(upb_msg_field_begin(&i, md);
722 !upb_msg_field_done(&i);
723 upb_msg_field_next(&i)) {
724 const upb_fielddef *f = upb_msg_iter_field(&i);
725 upb_fieldtype_t type = upb_fielddef_type(f);
726
727 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
728 generate_msgfield(c, f, method);
729 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
730 type == UPB_TYPE_MESSAGE) {
731 generate_delimfield(c, f, method);
732 } else {
733 generate_primitivefield(c, f, method);
734 }
735 }
736
737 /* If there were no fields, or if no handlers were defined, we need to
738 * generate a non-empty loop body so that we can at least dispatch for unknown
739 * fields and check for the end of the message. */
740 if (c->pc == start_pc) {
741 /* Check for end-of-message. */
742 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
743 /* Unconditionally dispatch. */
744 putop(c, OP_DISPATCH, 0);
745 }
746
747 /* For now we just loop back to the last field of the message (or if none,
748 * the DISPATCH opcode for the message). */
749 putop(c, OP_BRANCH, -LABEL_FIELD);
750
751 /* Insert both a label and a dispatch table entry for this end-of-msg. */
752 label(c, LABEL_ENDMSG);
753 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
754 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
755
756 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
757 putop(c, OP_RET);
758
759 upb_inttable_compact(&method->dispatch);
760 }
761
762 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
763 * Returns the method for these handlers.
764 *
765 * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)766 static void find_methods(compiler *c, const upb_handlers *h) {
767 upb_value v;
768 upb_msg_field_iter i;
769 const upb_msgdef *md;
770 upb_pbdecodermethod *method;
771
772 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
773 return;
774
775 method = newmethod(h, c->group);
776 upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
777
778 /* Find submethods. */
779 md = upb_handlers_msgdef(h);
780 for(upb_msg_field_begin(&i, md);
781 !upb_msg_field_done(&i);
782 upb_msg_field_next(&i)) {
783 const upb_fielddef *f = upb_msg_iter_field(&i);
784 const upb_handlers *sub_h;
785 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
786 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
787 /* We only generate a decoder method for submessages with handlers.
788 * Others will be parsed as unknown fields. */
789 find_methods(c, sub_h);
790 }
791 }
792 }
793
794 /* (Re-)compile bytecode for all messages in "msgs."
795 * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)796 static void compile_methods(compiler *c) {
797 upb_inttable_iter i;
798
799 /* Start over at the beginning of the bytecode. */
800 c->pc = c->group->bytecode;
801
802 upb_inttable_begin(&i, &c->group->methods);
803 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
804 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
805 compile_method(c, method);
806 }
807 }
808
set_bytecode_handlers(mgroup * g)809 static void set_bytecode_handlers(mgroup *g) {
810 upb_inttable_iter i;
811 upb_inttable_begin(&i, &g->methods);
812 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
813 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
814 upb_byteshandler *h = &m->input_handler_;
815
816 m->code_base.ptr = g->bytecode + m->code_base.ofs;
817
818 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
819 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
820 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
821 }
822 }
823
824
825 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
826 * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool lazy)827 const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) {
828 mgroup *g;
829 compiler *c;
830
831 g = newgroup();
832 c = newcompiler(g, lazy);
833 find_methods(c, dest);
834
835 /* We compile in two passes:
836 * 1. all messages are assigned relative offsets from the beginning of the
837 * bytecode (saved in method->code_base).
838 * 2. forwards OP_CALL instructions can be correctly linked since message
839 * offsets have been previously assigned.
840 *
841 * Could avoid the second pass by linking OP_CALL instructions somehow. */
842 compile_methods(c);
843 compile_methods(c);
844 g->bytecode_end = c->pc;
845 freecompiler(c);
846
847 #ifdef UPB_DUMP_BYTECODE
848 {
849 FILE *f = fopen("/tmp/upb-bytecode", "w");
850 UPB_ASSERT(f);
851 dumpbc(g->bytecode, g->bytecode_end, stderr);
852 dumpbc(g->bytecode, g->bytecode_end, f);
853 fclose(f);
854
855 f = fopen("/tmp/upb-bytecode.bin", "wb");
856 UPB_ASSERT(f);
857 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
858 fclose(f);
859 }
860 #endif
861
862 set_bytecode_handlers(g);
863 return g;
864 }
865
866
867 /* upb_pbcodecache ************************************************************/
868
upb_pbcodecache_new(upb_handlercache * dest)869 upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
870 upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
871
872 if (!c) return NULL;
873
874 c->dest = dest;
875 c->lazy = false;
876
877 c->arena = upb_arena_new();
878 if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
879
880 return c;
881 }
882
upb_pbcodecache_free(upb_pbcodecache * c)883 void upb_pbcodecache_free(upb_pbcodecache *c) {
884 upb_inttable_iter i;
885
886 upb_inttable_begin(&i, &c->groups);
887 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
888 upb_value val = upb_inttable_iter_value(&i);
889 freegroup((void*)upb_value_getconstptr(val));
890 }
891
892 upb_inttable_uninit(&c->groups);
893 upb_arena_free(c->arena);
894 upb_gfree(c);
895 }
896
upb_pbdecodermethodopts_setlazy(upb_pbcodecache * c,bool lazy)897 void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
898 UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
899 c->lazy = lazy;
900 }
901
upb_pbcodecache_get(upb_pbcodecache * c,const upb_msgdef * md)902 const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
903 const upb_msgdef *md) {
904 upb_value v;
905 bool ok;
906 const upb_handlers *h;
907 const mgroup *g;
908
909 h = upb_handlercache_get(c->dest, md);
910 if (upb_inttable_lookupptr(&c->groups, md, &v)) {
911 g = upb_value_getconstptr(v);
912 } else {
913 g = mgroup_new(h, c->lazy);
914 ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g));
915 UPB_ASSUME(ok);
916 }
917
918 ok = upb_inttable_lookupptr(&g->methods, h, &v);
919 UPB_ASSUME(ok);
920 return upb_value_getptr(v);
921 }
922