2 ** protobuf decoder bytecode compiler
4 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
5 ** according to that specific schema and destination handlers.
7 ** Bytecode definition is in decoder.int.h.
11 #include "upb/pb/decoder.int.h"
12 #include "upb/pb/varint.int.h"
14 #ifdef UPB_DUMP_BYTECODE
18 #include "upb/port_def.inc"
23 /* upb_pbdecodermethod ********************************************************/
25 static void freemethod(upb_pbdecodermethod *method) {
26 upb_inttable_uninit(&method->dispatch);
30 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
32 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
33 upb_byteshandler_init(&ret->input_handler_);
36 ret->dest_handlers_ = dest_handlers;
37 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
42 const upb_handlers *upb_pbdecodermethod_desthandlers(
43 const upb_pbdecodermethod *m) {
44 return m->dest_handlers_;
47 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
48 const upb_pbdecodermethod *m) {
49 return &m->input_handler_;
52 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
57 /* mgroup *********************************************************************/
59 static void freegroup(mgroup *g) {
62 upb_inttable_begin(&i, &g->methods);
63 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
64 freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
67 upb_inttable_uninit(&g->methods);
68 upb_gfree(g->bytecode);
72 mgroup *newgroup(void) {
73 mgroup *g = upb_gmalloc(sizeof(*g));
74 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
76 g->bytecode_end = NULL;
81 /* bytecode compiler **********************************************************/
83 /* Data used only at compilation time. */
88 int fwd_labels[MAXLABEL];
89 int back_labels[MAXLABEL];
91 /* For fields marked "lazy", parse them lazily or eagerly? */
95 static compiler *newcompiler(mgroup *group, bool lazy) {
96 compiler *ret = upb_gmalloc(sizeof(*ret));
101 for (i = 0; i < MAXLABEL; i++) {
102 ret->fwd_labels[i] = EMPTYLABEL;
103 ret->back_labels[i] = EMPTYLABEL;
108 static void freecompiler(compiler *c) {
112 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
114 /* How many words an instruction is. */
115 static int instruction_len(uint32_t instr) {
116 switch (getop(instr)) {
117 case OP_SETDISPATCH: return 1 + ptr_words;
118 case OP_TAGN: return 3;
119 case OP_SETBIGGROUPNUM: return 2;
124 bool op_has_longofs(int32_t instruction) {
125 switch (getop(instruction)) {
130 /* The "tag" instructions only have 8 bytes available for the jump target,
131 * but that is ok because these opcodes only require short jumps. */
142 static int32_t getofs(uint32_t instruction) {
143 if (op_has_longofs(instruction)) {
144 return (int32_t)instruction >> 8;
146 return (int8_t)(instruction >> 8);
150 static void setofs(uint32_t *instruction, int32_t ofs) {
151 if (op_has_longofs(*instruction)) {
152 *instruction = getop(*instruction) | (uint32_t)ofs << 8;
154 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
156 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
159 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
161 /* Defines a local label at the current PC location. All previous forward
162 * references are updated to point to this location. The location is noted
163 * for any future backward references. */
164 static void label(compiler *c, unsigned int label) {
168 UPB_ASSERT(label < MAXLABEL);
169 val = c->fwd_labels[label];
170 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
172 int ofs = getofs(*codep);
173 setofs(codep, c->pc - codep - instruction_len(*codep));
174 codep = ofs ? codep + ofs : NULL;
176 c->fwd_labels[label] = EMPTYLABEL;
177 c->back_labels[label] = pcofs(c);
180 /* Creates a reference to a numbered label; either a forward reference
181 * (positive arg) or backward reference (negative arg). For forward references
182 * the value returned now is actually a "next" pointer into a linked list of all
183 * instructions that use this label and will be patched later when the label is
184 * defined with label().
186 * The returned value is the offset that should be written into the instruction.
188 static int32_t labelref(compiler *c, int label) {
189 UPB_ASSERT(label < MAXLABEL);
190 if (label == LABEL_DISPATCH) {
191 /* No resolving required. */
193 } else if (label < 0) {
194 /* Backward local label. Relative to the next instruction. */
195 uint32_t from = (c->pc + 1) - c->group->bytecode;
196 return c->back_labels[-label] - from;
198 /* Forward local label: prepend to (possibly-empty) linked list. */
199 int *lptr = &c->fwd_labels[label];
200 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
206 static void put32(compiler *c, uint32_t v) {
207 mgroup *g = c->group;
208 if (c->pc == g->bytecode_end) {
210 size_t oldsize = g->bytecode_end - g->bytecode;
211 size_t newsize = UPB_MAX(oldsize * 2, 64);
212 /* TODO(haberman): handle OOM. */
213 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
214 newsize * sizeof(uint32_t));
215 g->bytecode_end = g->bytecode + newsize;
216 c->pc = g->bytecode + ofs;
221 static void putop(compiler *c, int op, ...) {
226 case OP_SETDISPATCH: {
227 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
228 put32(c, OP_SETDISPATCH);
230 if (sizeof(uintptr_t) > sizeof(uint32_t))
231 put32(c, (uint64_t)ptr >> 32);
236 case OP_PUSHLENDELIM:
244 case OP_PARSE_DOUBLE:
247 case OP_PARSE_UINT64:
249 case OP_PARSE_FIXED64:
250 case OP_PARSE_FIXED32:
252 case OP_PARSE_UINT32:
253 case OP_PARSE_SFIXED32:
254 case OP_PARSE_SFIXED64:
255 case OP_PARSE_SINT32:
256 case OP_PARSE_SINT64:
264 case OP_PUSHTAGDELIM:
265 put32(c, op | va_arg(ap, upb_selector_t) << 8);
267 case OP_SETBIGGROUPNUM:
269 put32(c, va_arg(ap, int));
272 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
273 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
278 uint32_t instruction = op;
279 int label = va_arg(ap, int);
280 setofs(&instruction, labelref(c, label));
281 put32(c, instruction);
286 int label = va_arg(ap, int);
287 uint64_t tag = va_arg(ap, uint64_t);
288 uint32_t instruction = op | (tag << 16);
289 UPB_ASSERT(tag <= 0xffff);
290 setofs(&instruction, labelref(c, label));
291 put32(c, instruction);
295 int label = va_arg(ap, int);
296 uint64_t tag = va_arg(ap, uint64_t);
297 uint32_t instruction = op | (upb_value_size(tag) << 16);
298 setofs(&instruction, labelref(c, label));
299 put32(c, instruction);
309 #if defined(UPB_DUMP_BYTECODE)
311 const char *upb_pbdecoder_getopname(unsigned int op) {
313 #define EXPAND_AND_QUOTE(x) QUOTE(x)
314 #define OPNAME(x) OP_##x
315 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
316 #define T(x) OP(PARSE_##x)
317 /* Keep in sync with list in decoder.int.h. */
318 switch ((opcode)op) {
319 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
320 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
321 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
322 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
323 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
324 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
325 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
327 return "<unknown op>";
334 #ifdef UPB_DUMP_BYTECODE
336 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
341 fprintf(f, "%p %8tx", p, p - begin);
342 uint32_t instr = *p++;
343 uint8_t op = getop(instr);
344 fprintf(f, " %s", upb_pbdecoder_getopname(op));
345 switch ((opcode)op) {
346 case OP_SETDISPATCH: {
347 const upb_inttable *dispatch;
348 memcpy(&dispatch, p, sizeof(void*));
350 const upb_pbdecodermethod *method =
351 (void *)((char *)dispatch -
352 offsetof(upb_pbdecodermethod, dispatch));
353 fprintf(f, " %s", upb_msgdef_fullname(
354 upb_handlers_msgdef(method->dest_handlers_)));
360 case OP_PUSHLENDELIM:
366 case OP_PARSE_DOUBLE:
369 case OP_PARSE_UINT64:
371 case OP_PARSE_FIXED64:
372 case OP_PARSE_FIXED32:
374 case OP_PARSE_UINT32:
375 case OP_PARSE_SFIXED32:
376 case OP_PARSE_SFIXED64:
377 case OP_PARSE_SINT32:
378 case OP_PARSE_SINT64:
386 case OP_PUSHTAGDELIM:
387 fprintf(f, " %d", instr >> 8);
389 case OP_SETBIGGROUPNUM:
390 fprintf(f, " %d", *p++);
395 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
399 fprintf(f, " tag:0x%x", instr >> 16);
401 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
407 tag |= (uint64_t)*p++ << 32;
408 fprintf(f, " tag:0x%llx", (long long)tag);
409 fprintf(f, " n:%d", instr >> 16);
411 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
422 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
423 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
424 uint64_t encoded_tag = upb_vencode32(tag);
425 /* No tag should be greater than 5 bytes. */
426 UPB_ASSERT(encoded_tag <= 0xffffffffff);
430 static void putchecktag(compiler *c, const upb_fielddef *f,
431 int wire_type, int dest) {
432 uint64_t tag = get_encoded_tag(f, wire_type);
433 switch (upb_value_size(tag)) {
435 putop(c, OP_TAG1, dest, tag);
438 putop(c, OP_TAG2, dest, tag);
441 putop(c, OP_TAGN, dest, tag);
446 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
447 upb_selector_t selector;
448 bool ok = upb_handlers_getselector(f, type, &selector);
453 /* Takes an existing, primary dispatch table entry and repacks it with a
454 * different alternate wire type. Called when we are inserting a secondary
455 * dispatch table entry for an alternate wire type. */
456 static uint64_t repack(uint64_t dispatch, int new_wt2) {
460 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
461 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
462 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
465 /* Marks the current bytecode position as the dispatch target for this message,
466 * field, and wire type. */
467 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
468 const upb_fielddef *f, int wire_type) {
469 /* Offset is relative to msg base. */
470 uint64_t ofs = pcofs(c) - method->code_base.ofs;
471 uint32_t fn = upb_fielddef_number(f);
472 upb_inttable *d = &method->dispatch;
474 if (upb_inttable_remove(d, fn, &v)) {
475 /* TODO: prioritize based on packed setting in .proto file. */
476 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
477 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
478 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
480 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
481 upb_inttable_insert(d, fn, upb_value_uint64(val));
485 static void putpush(compiler *c, const upb_fielddef *f) {
486 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
487 putop(c, OP_PUSHLENDELIM);
489 uint32_t fn = upb_fielddef_number(f);
491 putop(c, OP_PUSHTAGDELIM, 0);
492 putop(c, OP_SETBIGGROUPNUM, fn);
494 putop(c, OP_PUSHTAGDELIM, fn);
499 static upb_pbdecodermethod *find_submethod(const compiler *c,
500 const upb_pbdecodermethod *method,
501 const upb_fielddef *f) {
502 const upb_handlers *sub =
503 upb_handlers_getsubhandlers(method->dest_handlers_, f);
505 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
506 ? upb_value_getptr(v)
510 static void putsel(compiler *c, opcode op, upb_selector_t sel,
511 const upb_handlers *h) {
512 if (upb_handlers_gethandler(h, sel, NULL)) {
517 /* Puts an opcode to call a callback, but only if a callback actually exists for
518 * this field and handler type. */
519 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
520 const upb_fielddef *f, upb_handlertype_t type) {
521 putsel(c, op, getsel(f, type), h);
524 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
525 if (!upb_fielddef_lazy(f))
528 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
529 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
530 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
534 /* bytecode compiler code generation ******************************************/
536 /* Symbolic names for our local labels. */
537 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
538 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
539 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
540 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
542 /* Generates bytecode to parse a single non-lazy message field. */
543 static void generate_msgfield(compiler *c, const upb_fielddef *f,
544 upb_pbdecodermethod *method) {
545 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
546 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
550 /* Don't emit any code for this field at all; it will be parsed as an
553 * TODO(haberman): we should change this to parse it as a string field
554 * instead. It will probably be faster, but more importantly, once we
555 * start vending unknown fields, a field shouldn't be treated as unknown
556 * just because it doesn't have subhandlers registered. */
560 label(c, LABEL_FIELD);
563 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
564 ? UPB_WIRE_TYPE_DELIMITED
565 : UPB_WIRE_TYPE_START_GROUP;
567 if (upb_fielddef_isseq(f)) {
568 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
569 putchecktag(c, f, wire_type, LABEL_DISPATCH);
570 dispatchtarget(c, method, f, wire_type);
571 putop(c, OP_PUSHTAGDELIM, 0);
572 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
573 label(c, LABEL_LOOPSTART);
575 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
576 putop(c, OP_CALL, sub_m);
578 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
579 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
580 putop(c, OP_SETDELIM);
582 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
583 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
584 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
585 label(c, LABEL_LOOPBREAK);
587 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
589 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
590 putchecktag(c, f, wire_type, LABEL_DISPATCH);
591 dispatchtarget(c, method, f, wire_type);
593 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
594 putop(c, OP_CALL, sub_m);
596 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
597 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
598 putop(c, OP_SETDELIM);
603 /* Generates bytecode to parse a single string or lazy submessage field. */
604 static void generate_delimfield(compiler *c, const upb_fielddef *f,
605 upb_pbdecodermethod *method) {
606 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
608 label(c, LABEL_FIELD);
609 if (upb_fielddef_isseq(f)) {
610 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
611 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
612 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
613 putop(c, OP_PUSHTAGDELIM, 0);
614 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
615 label(c, LABEL_LOOPSTART);
616 putop(c, OP_PUSHLENDELIM);
617 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
618 /* Need to emit even if no handler to skip past the string. */
619 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
620 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
622 putop(c, OP_SETDELIM);
623 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
624 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
625 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
626 label(c, LABEL_LOOPBREAK);
628 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
630 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
631 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
632 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
633 putop(c, OP_PUSHLENDELIM);
634 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
635 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
636 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
638 putop(c, OP_SETDELIM);
642 /* Generates bytecode to parse a single primitive field. */
643 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
644 upb_pbdecodermethod *method) {
645 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
646 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
651 label(c, LABEL_FIELD);
653 /* From a decoding perspective, ENUM is the same as INT32. */
654 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
655 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
657 parse_type = (opcode)descriptor_type;
659 /* TODO(haberman): generate packed or non-packed first depending on "packed"
660 * setting in the fielddef. This will favor (in speed) whichever was
663 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
664 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
665 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
666 if (upb_fielddef_isseq(f)) {
667 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
668 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
669 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
670 putop(c, OP_PUSHLENDELIM);
671 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
672 label(c, LABEL_LOOPSTART);
673 putop(c, parse_type, sel);
674 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
675 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
676 dispatchtarget(c, method, f, wire_type);
677 putop(c, OP_PUSHTAGDELIM, 0);
678 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
679 label(c, LABEL_LOOPSTART);
680 putop(c, parse_type, sel);
681 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
682 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
683 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
684 label(c, LABEL_LOOPBREAK);
685 putop(c, OP_POP); /* Packed and non-packed join. */
686 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
687 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
689 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
690 putchecktag(c, f, wire_type, LABEL_DISPATCH);
691 dispatchtarget(c, method, f, wire_type);
692 putop(c, parse_type, sel);
696 /* Adds bytecode for parsing the given message to the given decoderplan,
697 * while adding all dispatch targets to this message's dispatch table. */
698 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
699 const upb_handlers *h;
700 const upb_msgdef *md;
702 upb_msg_field_iter i;
707 /* Clear all entries in the dispatch table. */
708 upb_inttable_uninit(&method->dispatch);
709 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
711 h = upb_pbdecodermethod_desthandlers(method);
712 md = upb_handlers_msgdef(h);
714 method->code_base.ofs = pcofs(c);
715 putop(c, OP_SETDISPATCH, &method->dispatch);
716 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
717 label(c, LABEL_FIELD);
719 for(upb_msg_field_begin(&i, md);
720 !upb_msg_field_done(&i);
721 upb_msg_field_next(&i)) {
722 const upb_fielddef *f = upb_msg_iter_field(&i);
723 upb_fieldtype_t type = upb_fielddef_type(f);
725 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
726 generate_msgfield(c, f, method);
727 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
728 type == UPB_TYPE_MESSAGE) {
729 generate_delimfield(c, f, method);
731 generate_primitivefield(c, f, method);
735 /* If there were no fields, or if no handlers were defined, we need to
736 * generate a non-empty loop body so that we can at least dispatch for unknown
737 * fields and check for the end of the message. */
738 if (c->pc == start_pc) {
739 /* Check for end-of-message. */
740 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
741 /* Unconditionally dispatch. */
742 putop(c, OP_DISPATCH, 0);
745 /* For now we just loop back to the last field of the message (or if none,
746 * the DISPATCH opcode for the message). */
747 putop(c, OP_BRANCH, -LABEL_FIELD);
749 /* Insert both a label and a dispatch table entry for this end-of-msg. */
750 label(c, LABEL_ENDMSG);
751 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
752 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
754 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
757 upb_inttable_compact(&method->dispatch);
760 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
761 * Returns the method for these handlers.
763 * Generates a new method for every destination handlers reachable from "h". */
764 static void find_methods(compiler *c, const upb_handlers *h) {
766 upb_msg_field_iter i;
767 const upb_msgdef *md;
768 upb_pbdecodermethod *method;
770 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
773 method = newmethod(h, c->group);
774 upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
776 /* Find submethods. */
777 md = upb_handlers_msgdef(h);
778 for(upb_msg_field_begin(&i, md);
779 !upb_msg_field_done(&i);
780 upb_msg_field_next(&i)) {
781 const upb_fielddef *f = upb_msg_iter_field(&i);
782 const upb_handlers *sub_h;
783 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
784 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
785 /* We only generate a decoder method for submessages with handlers.
786 * Others will be parsed as unknown fields. */
787 find_methods(c, sub_h);
792 /* (Re-)compile bytecode for all messages in "msgs."
793 * Overwrites any existing bytecode in "c". */
794 static void compile_methods(compiler *c) {
797 /* Start over at the beginning of the bytecode. */
798 c->pc = c->group->bytecode;
800 upb_inttable_begin(&i, &c->group->methods);
801 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
802 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
803 compile_method(c, method);
807 static void set_bytecode_handlers(mgroup *g) {
809 upb_inttable_begin(&i, &g->methods);
810 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
811 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
812 upb_byteshandler *h = &m->input_handler_;
814 m->code_base.ptr = g->bytecode + m->code_base.ofs;
816 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
817 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
818 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
823 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
824 * handlers and other mgroups (but verify we have a transitive closure). */
825 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy) {
829 UPB_UNUSED(allowjit);
832 c = newcompiler(g, lazy);
833 find_methods(c, dest);
835 /* We compile in two passes:
836 * 1. all messages are assigned relative offsets from the beginning of the
837 * bytecode (saved in method->code_base).
838 * 2. forwards OP_CALL instructions can be correctly linked since message
839 * offsets have been previously assigned.
841 * Could avoid the second pass by linking OP_CALL instructions somehow. */
844 g->bytecode_end = c->pc;
847 #ifdef UPB_DUMP_BYTECODE
849 FILE *f = fopen("/tmp/upb-bytecode", "w");
851 dumpbc(g->bytecode, g->bytecode_end, stderr);
852 dumpbc(g->bytecode, g->bytecode_end, f);
855 f = fopen("/tmp/upb-bytecode.bin", "wb");
857 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
862 set_bytecode_handlers(g);
867 /* upb_pbcodecache ************************************************************/
869 upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
870 upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
878 c->arena = upb_arena_new();
879 if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
884 void upb_pbcodecache_free(upb_pbcodecache *c) {
887 for (i = 0; i < upb_inttable_count(&c->groups); i++) {
889 bool ok = upb_inttable_lookup(&c->groups, i, &v);
891 freegroup((void*)upb_value_getconstptr(v));
894 upb_inttable_uninit(&c->groups);
895 upb_arena_free(c->arena);
899 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
903 void upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
904 UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
905 c->allow_jit = allow;
908 void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
909 UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
913 const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
914 const upb_msgdef *md) {
917 const upb_handlers *h;
920 /* Right now we build a new DecoderMethod every time.
921 * TODO(haberman): properly cache methods by their true key. */
922 h = upb_handlercache_get(c->dest, md);
923 g = mgroup_new(h, c->allow_jit, c->lazy);
924 upb_inttable_push(&c->groups, upb_value_constptr(g));
926 ok = upb_inttable_lookupptr(&g->methods, h, &v);
928 return upb_value_getptr(v);