1 /*
2 ** protobuf decoder bytecode compiler
3 **
4 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
5 ** according to that specific schema and destination handlers.
6 **
7 ** Bytecode definition is in decoder.int.h.
8 */
9
10 #include <stdarg.h>
11 #include "upb/pb/decoder.int.h"
12 #include "upb/pb/varint.int.h"
13
14 #ifdef UPB_DUMP_BYTECODE
15 #include <stdio.h>
16 #endif
17
18 #include "upb/port_def.inc"
19
20 #define MAXLABEL 5
21 #define EMPTYLABEL -1
22
23 /* upb_pbdecodermethod ********************************************************/
24
freemethod(upb_pbdecodermethod * method)25 static void freemethod(upb_pbdecodermethod *method) {
26 upb_inttable_uninit(&method->dispatch);
27 upb_gfree(method);
28 }
29
newmethod(const upb_handlers * dest_handlers,mgroup * group)30 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
31 mgroup *group) {
32 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
33 upb_byteshandler_init(&ret->input_handler_);
34
35 ret->group = group;
36 ret->dest_handlers_ = dest_handlers;
37 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
38
39 return ret;
40 }
41
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)42 const upb_handlers *upb_pbdecodermethod_desthandlers(
43 const upb_pbdecodermethod *m) {
44 return m->dest_handlers_;
45 }
46
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)47 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
48 const upb_pbdecodermethod *m) {
49 return &m->input_handler_;
50 }
51
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)52 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
53 return m->is_native_;
54 }
55
56
57 /* mgroup *********************************************************************/
58
freegroup(mgroup * g)59 static void freegroup(mgroup *g) {
60 upb_inttable_iter i;
61
62 upb_inttable_begin(&i, &g->methods);
63 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
64 freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
65 }
66
67 upb_inttable_uninit(&g->methods);
68 upb_gfree(g->bytecode);
69 upb_gfree(g);
70 }
71
newgroup(void)72 mgroup *newgroup(void) {
73 mgroup *g = upb_gmalloc(sizeof(*g));
74 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
75 g->bytecode = NULL;
76 g->bytecode_end = NULL;
77 return g;
78 }
79
80
81 /* bytecode compiler **********************************************************/
82
83 /* Data used only at compilation time. */
84 typedef struct {
85 mgroup *group;
86
87 uint32_t *pc;
88 int fwd_labels[MAXLABEL];
89 int back_labels[MAXLABEL];
90
91 /* For fields marked "lazy", parse them lazily or eagerly? */
92 bool lazy;
93 } compiler;
94
newcompiler(mgroup * group,bool lazy)95 static compiler *newcompiler(mgroup *group, bool lazy) {
96 compiler *ret = upb_gmalloc(sizeof(*ret));
97 int i;
98
99 ret->group = group;
100 ret->lazy = lazy;
101 for (i = 0; i < MAXLABEL; i++) {
102 ret->fwd_labels[i] = EMPTYLABEL;
103 ret->back_labels[i] = EMPTYLABEL;
104 }
105 return ret;
106 }
107
freecompiler(compiler * c)108 static void freecompiler(compiler *c) {
109 upb_gfree(c);
110 }
111
112 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
113
114 /* How many words an instruction is. */
instruction_len(uint32_t instr)115 static int instruction_len(uint32_t instr) {
116 switch (getop(instr)) {
117 case OP_SETDISPATCH: return 1 + ptr_words;
118 case OP_TAGN: return 3;
119 case OP_SETBIGGROUPNUM: return 2;
120 default: return 1;
121 }
122 }
123
op_has_longofs(int32_t instruction)124 bool op_has_longofs(int32_t instruction) {
125 switch (getop(instruction)) {
126 case OP_CALL:
127 case OP_BRANCH:
128 case OP_CHECKDELIM:
129 return true;
130 /* The "tag" instructions only have 8 bytes available for the jump target,
131 * but that is ok because these opcodes only require short jumps. */
132 case OP_TAG1:
133 case OP_TAG2:
134 case OP_TAGN:
135 return false;
136 default:
137 UPB_ASSERT(false);
138 return false;
139 }
140 }
141
getofs(uint32_t instruction)142 static int32_t getofs(uint32_t instruction) {
143 if (op_has_longofs(instruction)) {
144 return (int32_t)instruction >> 8;
145 } else {
146 return (int8_t)(instruction >> 8);
147 }
148 }
149
setofs(uint32_t * instruction,int32_t ofs)150 static void setofs(uint32_t *instruction, int32_t ofs) {
151 if (op_has_longofs(*instruction)) {
152 *instruction = getop(*instruction) | (uint32_t)ofs << 8;
153 } else {
154 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
155 }
156 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
157 }
158
pcofs(compiler * c)159 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
160
161 /* Defines a local label at the current PC location. All previous forward
162 * references are updated to point to this location. The location is noted
163 * for any future backward references. */
label(compiler * c,unsigned int label)164 static void label(compiler *c, unsigned int label) {
165 int val;
166 uint32_t *codep;
167
168 UPB_ASSERT(label < MAXLABEL);
169 val = c->fwd_labels[label];
170 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
171 while (codep) {
172 int ofs = getofs(*codep);
173 setofs(codep, c->pc - codep - instruction_len(*codep));
174 codep = ofs ? codep + ofs : NULL;
175 }
176 c->fwd_labels[label] = EMPTYLABEL;
177 c->back_labels[label] = pcofs(c);
178 }
179
180 /* Creates a reference to a numbered label; either a forward reference
181 * (positive arg) or backward reference (negative arg). For forward references
182 * the value returned now is actually a "next" pointer into a linked list of all
183 * instructions that use this label and will be patched later when the label is
184 * defined with label().
185 *
186 * The returned value is the offset that should be written into the instruction.
187 */
labelref(compiler * c,int label)188 static int32_t labelref(compiler *c, int label) {
189 UPB_ASSERT(label < MAXLABEL);
190 if (label == LABEL_DISPATCH) {
191 /* No resolving required. */
192 return 0;
193 } else if (label < 0) {
194 /* Backward local label. Relative to the next instruction. */
195 uint32_t from = (c->pc + 1) - c->group->bytecode;
196 return c->back_labels[-label] - from;
197 } else {
198 /* Forward local label: prepend to (possibly-empty) linked list. */
199 int *lptr = &c->fwd_labels[label];
200 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
201 *lptr = pcofs(c);
202 return ret;
203 }
204 }
205
put32(compiler * c,uint32_t v)206 static void put32(compiler *c, uint32_t v) {
207 mgroup *g = c->group;
208 if (c->pc == g->bytecode_end) {
209 int ofs = pcofs(c);
210 size_t oldsize = g->bytecode_end - g->bytecode;
211 size_t newsize = UPB_MAX(oldsize * 2, 64);
212 /* TODO(haberman): handle OOM. */
213 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
214 newsize * sizeof(uint32_t));
215 g->bytecode_end = g->bytecode + newsize;
216 c->pc = g->bytecode + ofs;
217 }
218 *c->pc++ = v;
219 }
220
putop(compiler * c,int op,...)221 static void putop(compiler *c, int op, ...) {
222 va_list ap;
223 va_start(ap, op);
224
225 switch (op) {
226 case OP_SETDISPATCH: {
227 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
228 put32(c, OP_SETDISPATCH);
229 put32(c, ptr);
230 if (sizeof(uintptr_t) > sizeof(uint32_t))
231 put32(c, (uint64_t)ptr >> 32);
232 break;
233 }
234 case OP_STARTMSG:
235 case OP_ENDMSG:
236 case OP_PUSHLENDELIM:
237 case OP_POP:
238 case OP_SETDELIM:
239 case OP_HALT:
240 case OP_RET:
241 case OP_DISPATCH:
242 put32(c, op);
243 break;
244 case OP_PARSE_DOUBLE:
245 case OP_PARSE_FLOAT:
246 case OP_PARSE_INT64:
247 case OP_PARSE_UINT64:
248 case OP_PARSE_INT32:
249 case OP_PARSE_FIXED64:
250 case OP_PARSE_FIXED32:
251 case OP_PARSE_BOOL:
252 case OP_PARSE_UINT32:
253 case OP_PARSE_SFIXED32:
254 case OP_PARSE_SFIXED64:
255 case OP_PARSE_SINT32:
256 case OP_PARSE_SINT64:
257 case OP_STARTSEQ:
258 case OP_ENDSEQ:
259 case OP_STARTSUBMSG:
260 case OP_ENDSUBMSG:
261 case OP_STARTSTR:
262 case OP_STRING:
263 case OP_ENDSTR:
264 case OP_PUSHTAGDELIM:
265 put32(c, op | va_arg(ap, upb_selector_t) << 8);
266 break;
267 case OP_SETBIGGROUPNUM:
268 put32(c, op);
269 put32(c, va_arg(ap, int));
270 break;
271 case OP_CALL: {
272 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
273 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
274 break;
275 }
276 case OP_CHECKDELIM:
277 case OP_BRANCH: {
278 uint32_t instruction = op;
279 int label = va_arg(ap, int);
280 setofs(&instruction, labelref(c, label));
281 put32(c, instruction);
282 break;
283 }
284 case OP_TAG1:
285 case OP_TAG2: {
286 int label = va_arg(ap, int);
287 uint64_t tag = va_arg(ap, uint64_t);
288 uint32_t instruction = op | (tag << 16);
289 UPB_ASSERT(tag <= 0xffff);
290 setofs(&instruction, labelref(c, label));
291 put32(c, instruction);
292 break;
293 }
294 case OP_TAGN: {
295 int label = va_arg(ap, int);
296 uint64_t tag = va_arg(ap, uint64_t);
297 uint32_t instruction = op | (upb_value_size(tag) << 16);
298 setofs(&instruction, labelref(c, label));
299 put32(c, instruction);
300 put32(c, tag);
301 put32(c, tag >> 32);
302 break;
303 }
304 }
305
306 va_end(ap);
307 }
308
309 #if defined(UPB_DUMP_BYTECODE)
310
upb_pbdecoder_getopname(unsigned int op)311 const char *upb_pbdecoder_getopname(unsigned int op) {
312 #define QUOTE(x) #x
313 #define EXPAND_AND_QUOTE(x) QUOTE(x)
314 #define OPNAME(x) OP_##x
315 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
316 #define T(x) OP(PARSE_##x)
317 /* Keep in sync with list in decoder.int.h. */
318 switch ((opcode)op) {
319 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
320 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
321 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
322 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
323 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
324 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
325 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
326 }
327 return "<unknown op>";
328 #undef OP
329 #undef T
330 }
331
332 #endif
333
334 #ifdef UPB_DUMP_BYTECODE
335
dumpbc(uint32_t * p,uint32_t * end,FILE * f)336 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
337
338 uint32_t *begin = p;
339
340 while (p < end) {
341 fprintf(f, "%p %8tx", p, p - begin);
342 uint32_t instr = *p++;
343 uint8_t op = getop(instr);
344 fprintf(f, " %s", upb_pbdecoder_getopname(op));
345 switch ((opcode)op) {
346 case OP_SETDISPATCH: {
347 const upb_inttable *dispatch;
348 memcpy(&dispatch, p, sizeof(void*));
349 p += ptr_words;
350 const upb_pbdecodermethod *method =
351 (void *)((char *)dispatch -
352 offsetof(upb_pbdecodermethod, dispatch));
353 fprintf(f, " %s", upb_msgdef_fullname(
354 upb_handlers_msgdef(method->dest_handlers_)));
355 break;
356 }
357 case OP_DISPATCH:
358 case OP_STARTMSG:
359 case OP_ENDMSG:
360 case OP_PUSHLENDELIM:
361 case OP_POP:
362 case OP_SETDELIM:
363 case OP_HALT:
364 case OP_RET:
365 break;
366 case OP_PARSE_DOUBLE:
367 case OP_PARSE_FLOAT:
368 case OP_PARSE_INT64:
369 case OP_PARSE_UINT64:
370 case OP_PARSE_INT32:
371 case OP_PARSE_FIXED64:
372 case OP_PARSE_FIXED32:
373 case OP_PARSE_BOOL:
374 case OP_PARSE_UINT32:
375 case OP_PARSE_SFIXED32:
376 case OP_PARSE_SFIXED64:
377 case OP_PARSE_SINT32:
378 case OP_PARSE_SINT64:
379 case OP_STARTSEQ:
380 case OP_ENDSEQ:
381 case OP_STARTSUBMSG:
382 case OP_ENDSUBMSG:
383 case OP_STARTSTR:
384 case OP_STRING:
385 case OP_ENDSTR:
386 case OP_PUSHTAGDELIM:
387 fprintf(f, " %d", instr >> 8);
388 break;
389 case OP_SETBIGGROUPNUM:
390 fprintf(f, " %d", *p++);
391 break;
392 case OP_CHECKDELIM:
393 case OP_CALL:
394 case OP_BRANCH:
395 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
396 break;
397 case OP_TAG1:
398 case OP_TAG2: {
399 fprintf(f, " tag:0x%x", instr >> 16);
400 if (getofs(instr)) {
401 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
402 }
403 break;
404 }
405 case OP_TAGN: {
406 uint64_t tag = *p++;
407 tag |= (uint64_t)*p++ << 32;
408 fprintf(f, " tag:0x%llx", (long long)tag);
409 fprintf(f, " n:%d", instr >> 16);
410 if (getofs(instr)) {
411 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
412 }
413 break;
414 }
415 }
416 fputs("\n", f);
417 }
418 }
419
420 #endif
421
get_encoded_tag(const upb_fielddef * f,int wire_type)422 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
423 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
424 uint64_t encoded_tag = upb_vencode32(tag);
425 /* No tag should be greater than 5 bytes. */
426 UPB_ASSERT(encoded_tag <= 0xffffffffff);
427 return encoded_tag;
428 }
429
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)430 static void putchecktag(compiler *c, const upb_fielddef *f,
431 int wire_type, int dest) {
432 uint64_t tag = get_encoded_tag(f, wire_type);
433 switch (upb_value_size(tag)) {
434 case 1:
435 putop(c, OP_TAG1, dest, tag);
436 break;
437 case 2:
438 putop(c, OP_TAG2, dest, tag);
439 break;
440 default:
441 putop(c, OP_TAGN, dest, tag);
442 break;
443 }
444 }
445
getsel(const upb_fielddef * f,upb_handlertype_t type)446 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
447 upb_selector_t selector;
448 bool ok = upb_handlers_getselector(f, type, &selector);
449 UPB_ASSERT(ok);
450 return selector;
451 }
452
453 /* Takes an existing, primary dispatch table entry and repacks it with a
454 * different alternate wire type. Called when we are inserting a secondary
455 * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)456 static uint64_t repack(uint64_t dispatch, int new_wt2) {
457 uint64_t ofs;
458 uint8_t wt1;
459 uint8_t old_wt2;
460 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
461 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
462 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
463 }
464
465 /* Marks the current bytecode position as the dispatch target for this message,
466 * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)467 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
468 const upb_fielddef *f, int wire_type) {
469 /* Offset is relative to msg base. */
470 uint64_t ofs = pcofs(c) - method->code_base.ofs;
471 uint32_t fn = upb_fielddef_number(f);
472 upb_inttable *d = &method->dispatch;
473 upb_value v;
474 if (upb_inttable_remove(d, fn, &v)) {
475 /* TODO: prioritize based on packed setting in .proto file. */
476 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
477 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
478 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
479 } else {
480 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
481 upb_inttable_insert(d, fn, upb_value_uint64(val));
482 }
483 }
484
putpush(compiler * c,const upb_fielddef * f)485 static void putpush(compiler *c, const upb_fielddef *f) {
486 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
487 putop(c, OP_PUSHLENDELIM);
488 } else {
489 uint32_t fn = upb_fielddef_number(f);
490 if (fn >= 1 << 24) {
491 putop(c, OP_PUSHTAGDELIM, 0);
492 putop(c, OP_SETBIGGROUPNUM, fn);
493 } else {
494 putop(c, OP_PUSHTAGDELIM, fn);
495 }
496 }
497 }
498
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)499 static upb_pbdecodermethod *find_submethod(const compiler *c,
500 const upb_pbdecodermethod *method,
501 const upb_fielddef *f) {
502 const upb_handlers *sub =
503 upb_handlers_getsubhandlers(method->dest_handlers_, f);
504 upb_value v;
505 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
506 ? upb_value_getptr(v)
507 : NULL;
508 }
509
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)510 static void putsel(compiler *c, opcode op, upb_selector_t sel,
511 const upb_handlers *h) {
512 if (upb_handlers_gethandler(h, sel, NULL)) {
513 putop(c, op, sel);
514 }
515 }
516
517 /* Puts an opcode to call a callback, but only if a callback actually exists for
518 * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)519 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
520 const upb_fielddef *f, upb_handlertype_t type) {
521 putsel(c, op, getsel(f, type), h);
522 }
523
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)524 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
525 if (!upb_fielddef_lazy(f))
526 return false;
527
528 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
529 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
530 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
531 }
532
533
534 /* bytecode compiler code generation ******************************************/
535
536 /* Symbolic names for our local labels. */
537 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
538 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
539 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
540 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
541
542 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)543 static void generate_msgfield(compiler *c, const upb_fielddef *f,
544 upb_pbdecodermethod *method) {
545 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
546 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
547 int wire_type;
548
549 if (!sub_m) {
550 /* Don't emit any code for this field at all; it will be parsed as an
551 * unknown field.
552 *
553 * TODO(haberman): we should change this to parse it as a string field
554 * instead. It will probably be faster, but more importantly, once we
555 * start vending unknown fields, a field shouldn't be treated as unknown
556 * just because it doesn't have subhandlers registered. */
557 return;
558 }
559
560 label(c, LABEL_FIELD);
561
562 wire_type =
563 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
564 ? UPB_WIRE_TYPE_DELIMITED
565 : UPB_WIRE_TYPE_START_GROUP;
566
567 if (upb_fielddef_isseq(f)) {
568 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
569 putchecktag(c, f, wire_type, LABEL_DISPATCH);
570 dispatchtarget(c, method, f, wire_type);
571 putop(c, OP_PUSHTAGDELIM, 0);
572 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
573 label(c, LABEL_LOOPSTART);
574 putpush(c, f);
575 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
576 putop(c, OP_CALL, sub_m);
577 putop(c, OP_POP);
578 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
579 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
580 putop(c, OP_SETDELIM);
581 }
582 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
583 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
584 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
585 label(c, LABEL_LOOPBREAK);
586 putop(c, OP_POP);
587 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
588 } else {
589 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
590 putchecktag(c, f, wire_type, LABEL_DISPATCH);
591 dispatchtarget(c, method, f, wire_type);
592 putpush(c, f);
593 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
594 putop(c, OP_CALL, sub_m);
595 putop(c, OP_POP);
596 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
597 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
598 putop(c, OP_SETDELIM);
599 }
600 }
601 }
602
603 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)604 static void generate_delimfield(compiler *c, const upb_fielddef *f,
605 upb_pbdecodermethod *method) {
606 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
607
608 label(c, LABEL_FIELD);
609 if (upb_fielddef_isseq(f)) {
610 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
611 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
612 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
613 putop(c, OP_PUSHTAGDELIM, 0);
614 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
615 label(c, LABEL_LOOPSTART);
616 putop(c, OP_PUSHLENDELIM);
617 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
618 /* Need to emit even if no handler to skip past the string. */
619 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
620 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
621 putop(c, OP_POP);
622 putop(c, OP_SETDELIM);
623 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
624 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
625 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
626 label(c, LABEL_LOOPBREAK);
627 putop(c, OP_POP);
628 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
629 } else {
630 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
631 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
632 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
633 putop(c, OP_PUSHLENDELIM);
634 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
635 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
636 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
637 putop(c, OP_POP);
638 putop(c, OP_SETDELIM);
639 }
640 }
641
642 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)643 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
644 upb_pbdecodermethod *method) {
645 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
646 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
647 opcode parse_type;
648 upb_selector_t sel;
649 int wire_type;
650
651 label(c, LABEL_FIELD);
652
653 /* From a decoding perspective, ENUM is the same as INT32. */
654 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
655 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
656
657 parse_type = (opcode)descriptor_type;
658
659 /* TODO(haberman): generate packed or non-packed first depending on "packed"
660 * setting in the fielddef. This will favor (in speed) whichever was
661 * specified. */
662
663 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
664 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
665 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
666 if (upb_fielddef_isseq(f)) {
667 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
668 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
669 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
670 putop(c, OP_PUSHLENDELIM);
671 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
672 label(c, LABEL_LOOPSTART);
673 putop(c, parse_type, sel);
674 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
675 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
676 dispatchtarget(c, method, f, wire_type);
677 putop(c, OP_PUSHTAGDELIM, 0);
678 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
679 label(c, LABEL_LOOPSTART);
680 putop(c, parse_type, sel);
681 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
682 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
683 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
684 label(c, LABEL_LOOPBREAK);
685 putop(c, OP_POP); /* Packed and non-packed join. */
686 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
687 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
688 } else {
689 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
690 putchecktag(c, f, wire_type, LABEL_DISPATCH);
691 dispatchtarget(c, method, f, wire_type);
692 putop(c, parse_type, sel);
693 }
694 }
695
696 /* Adds bytecode for parsing the given message to the given decoderplan,
697 * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)698 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
699 const upb_handlers *h;
700 const upb_msgdef *md;
701 uint32_t* start_pc;
702 upb_msg_field_iter i;
703 upb_value val;
704
705 UPB_ASSERT(method);
706
707 /* Clear all entries in the dispatch table. */
708 upb_inttable_uninit(&method->dispatch);
709 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
710
711 h = upb_pbdecodermethod_desthandlers(method);
712 md = upb_handlers_msgdef(h);
713
714 method->code_base.ofs = pcofs(c);
715 putop(c, OP_SETDISPATCH, &method->dispatch);
716 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
717 label(c, LABEL_FIELD);
718 start_pc = c->pc;
719 for(upb_msg_field_begin(&i, md);
720 !upb_msg_field_done(&i);
721 upb_msg_field_next(&i)) {
722 const upb_fielddef *f = upb_msg_iter_field(&i);
723 upb_fieldtype_t type = upb_fielddef_type(f);
724
725 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
726 generate_msgfield(c, f, method);
727 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
728 type == UPB_TYPE_MESSAGE) {
729 generate_delimfield(c, f, method);
730 } else {
731 generate_primitivefield(c, f, method);
732 }
733 }
734
735 /* If there were no fields, or if no handlers were defined, we need to
736 * generate a non-empty loop body so that we can at least dispatch for unknown
737 * fields and check for the end of the message. */
738 if (c->pc == start_pc) {
739 /* Check for end-of-message. */
740 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
741 /* Unconditionally dispatch. */
742 putop(c, OP_DISPATCH, 0);
743 }
744
745 /* For now we just loop back to the last field of the message (or if none,
746 * the DISPATCH opcode for the message). */
747 putop(c, OP_BRANCH, -LABEL_FIELD);
748
749 /* Insert both a label and a dispatch table entry for this end-of-msg. */
750 label(c, LABEL_ENDMSG);
751 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
752 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
753
754 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
755 putop(c, OP_RET);
756
757 upb_inttable_compact(&method->dispatch);
758 }
759
760 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
761 * Returns the method for these handlers.
762 *
763 * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)764 static void find_methods(compiler *c, const upb_handlers *h) {
765 upb_value v;
766 upb_msg_field_iter i;
767 const upb_msgdef *md;
768 upb_pbdecodermethod *method;
769
770 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
771 return;
772
773 method = newmethod(h, c->group);
774 upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
775
776 /* Find submethods. */
777 md = upb_handlers_msgdef(h);
778 for(upb_msg_field_begin(&i, md);
779 !upb_msg_field_done(&i);
780 upb_msg_field_next(&i)) {
781 const upb_fielddef *f = upb_msg_iter_field(&i);
782 const upb_handlers *sub_h;
783 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
784 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
785 /* We only generate a decoder method for submessages with handlers.
786 * Others will be parsed as unknown fields. */
787 find_methods(c, sub_h);
788 }
789 }
790 }
791
792 /* (Re-)compile bytecode for all messages in "msgs."
793 * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)794 static void compile_methods(compiler *c) {
795 upb_inttable_iter i;
796
797 /* Start over at the beginning of the bytecode. */
798 c->pc = c->group->bytecode;
799
800 upb_inttable_begin(&i, &c->group->methods);
801 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
802 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
803 compile_method(c, method);
804 }
805 }
806
set_bytecode_handlers(mgroup * g)807 static void set_bytecode_handlers(mgroup *g) {
808 upb_inttable_iter i;
809 upb_inttable_begin(&i, &g->methods);
810 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
811 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
812 upb_byteshandler *h = &m->input_handler_;
813
814 m->code_base.ptr = g->bytecode + m->code_base.ofs;
815
816 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
817 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
818 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
819 }
820 }
821
822
823 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
824 * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool lazy)825 const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) {
826 mgroup *g;
827 compiler *c;
828
829 g = newgroup();
830 c = newcompiler(g, lazy);
831 find_methods(c, dest);
832
833 /* We compile in two passes:
834 * 1. all messages are assigned relative offsets from the beginning of the
835 * bytecode (saved in method->code_base).
836 * 2. forwards OP_CALL instructions can be correctly linked since message
837 * offsets have been previously assigned.
838 *
839 * Could avoid the second pass by linking OP_CALL instructions somehow. */
840 compile_methods(c);
841 compile_methods(c);
842 g->bytecode_end = c->pc;
843 freecompiler(c);
844
845 #ifdef UPB_DUMP_BYTECODE
846 {
847 FILE *f = fopen("/tmp/upb-bytecode", "w");
848 UPB_ASSERT(f);
849 dumpbc(g->bytecode, g->bytecode_end, stderr);
850 dumpbc(g->bytecode, g->bytecode_end, f);
851 fclose(f);
852
853 f = fopen("/tmp/upb-bytecode.bin", "wb");
854 UPB_ASSERT(f);
855 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
856 fclose(f);
857 }
858 #endif
859
860 set_bytecode_handlers(g);
861 return g;
862 }
863
864
865 /* upb_pbcodecache ************************************************************/
866
upb_pbcodecache_new(upb_handlercache * dest)867 upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
868 upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
869
870 if (!c) return NULL;
871
872 c->dest = dest;
873 c->lazy = false;
874
875 c->arena = upb_arena_new();
876 if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
877
878 return c;
879 }
880
upb_pbcodecache_free(upb_pbcodecache * c)881 void upb_pbcodecache_free(upb_pbcodecache *c) {
882 upb_inttable_iter i;
883
884 upb_inttable_begin(&i, &c->groups);
885 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
886 upb_value val = upb_inttable_iter_value(&i);
887 freegroup((void*)upb_value_getconstptr(val));
888 }
889
890 upb_inttable_uninit(&c->groups);
891 upb_arena_free(c->arena);
892 upb_gfree(c);
893 }
894
upb_pbdecodermethodopts_setlazy(upb_pbcodecache * c,bool lazy)895 void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
896 UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
897 c->lazy = lazy;
898 }
899
upb_pbcodecache_get(upb_pbcodecache * c,const upb_msgdef * md)900 const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
901 const upb_msgdef *md) {
902 upb_value v;
903 bool ok;
904 const upb_handlers *h;
905 const mgroup *g;
906
907 h = upb_handlercache_get(c->dest, md);
908 if (upb_inttable_lookupptr(&c->groups, md, &v)) {
909 g = upb_value_getconstptr(v);
910 } else {
911 g = mgroup_new(h, c->lazy);
912 ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g));
913 UPB_ASSERT(ok);
914 }
915
916 ok = upb_inttable_lookupptr(&g->methods, h, &v);
917 UPB_ASSERT(ok);
918 return upb_value_getptr(v);
919 }
920