2 ** upb::Decoder (Bytecode Decoder VM)
4 ** Bytecode must previously have been generated using the bytecode compiler in
5 ** compile_decoder.c. This decoder then walks through the bytecode op-by-op to
8 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
9 ** instruction and resume from there. A fair amount of the logic here is to
10 ** handle the fact that values can span buffer seams and we have to be able to
11 ** be capable of suspending/resuming from any byte in the stream. This
12 ** sometimes requires keeping a few trailing bytes from the last buffer around
13 ** in the "residual" buffer.
18 #include "upb/pb/decoder.int.h"
19 #include "upb/pb/varint.int.h"
21 #ifdef UPB_DUMP_BYTECODE
25 #include "upb/port_def.inc"
27 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
29 /* Error messages that are shared between the bytecode and JIT decoders. */
30 const char *kPbDecoderStackOverflow = "Nesting too deep.";
31 const char *kPbDecoderSubmessageTooLong =
32 "Submessage end extends past enclosing submessage.";
34 /* Error messages shared within this file. */
35 static const char *kUnterminatedVarint = "Unterminated varint.";
37 /* upb_pbdecoder **************************************************************/
39 static opcode halt = OP_HALT;
41 /* A dummy character we can point to when the user passes us a NULL buffer.
42 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
43 * behavior, which would invalidate functions like curbufleft(). */
44 static const char dummy_char;
46 /* Whether an op consumes any of the input buffer. */
47 static bool consumes_input(opcode op) {
61 case OP_SETBIGGROUPNUM:
72 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
74 return entries * sizeof(upb_pbdecoder_frame);
77 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
80 #ifdef UPB_USE_JIT_X64
81 if (d->method_->is_native_) {
82 /* Each native stack frame needs two pointers, plus we need a few frames for
83 * the enter/exit trampolines. */
84 size_t ret = entries * sizeof(void*) * 2;
85 ret += sizeof(void*) * 10;
90 return entries * sizeof(uint32_t*);
94 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
96 /* It's unfortunate that we have to micro-manage the compiler with
97 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
98 * specific to one hardware configuration. But empirically on a Core i7,
99 * performance increases 30-50% with these annotations. Every instance where
100 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
103 static void seterr(upb_pbdecoder *d, const char *msg) {
104 upb_status_seterrmsg(d->status, msg);
107 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
112 /* Buffering ******************************************************************/
114 /* We operate on one buffer at a time, which is either the user's buffer passed
115 * to our "decode" callback or some residual bytes from the previous buffer. */
117 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
118 * or past the current delimited end. */
119 static size_t curbufleft(const upb_pbdecoder *d) {
120 UPB_ASSERT(d->data_end >= d->ptr);
121 return d->data_end - d->ptr;
124 /* How many bytes are available before end-of-buffer. */
125 static size_t bufleft(const upb_pbdecoder *d) {
126 return d->end - d->ptr;
129 /* Overall stream offset of d->ptr. */
130 uint64_t offset(const upb_pbdecoder *d) {
131 return d->bufstart_ofs + (d->ptr - d->buf);
134 /* How many bytes are available before the end of this delimited region. */
135 size_t delim_remaining(const upb_pbdecoder *d) {
136 return d->top->end_ofs - offset(d);
139 /* Advances d->ptr. */
140 static void advance(upb_pbdecoder *d, size_t len) {
141 UPB_ASSERT(curbufleft(d) >= len);
145 static bool in_buf(const char *p, const char *buf, const char *end) {
146 return p >= buf && p <= end;
149 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
150 return in_buf(p, d->residual, d->residual_end);
153 /* Calculates the delim_end value, which is affected by both the current buffer
154 * and the parsing stack, so must be called whenever either is updated. */
155 static void set_delim_end(upb_pbdecoder *d) {
156 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
157 if (delim_ofs <= (size_t)(d->end - d->buf)) {
158 d->delim_end = d->buf + delim_ofs;
159 d->data_end = d->delim_end;
161 d->data_end = d->end;
166 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
173 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
174 UPB_ASSERT(curbufleft(d) == 0);
175 d->bufstart_ofs += (d->end - d->buf);
176 switchtobuf(d, buf, buf + len);
179 static void checkpoint(upb_pbdecoder *d) {
180 /* The assertion here is in the interests of efficiency, not correctness.
181 * We are trying to ensure that we don't checkpoint() more often than
183 UPB_ASSERT(d->checkpoint != d->ptr);
184 d->checkpoint = d->ptr;
187 /* Skips "bytes" bytes in the stream, which may be more than available. If we
188 * skip more bytes than are available, we return a long read count to the caller
189 * indicating how many bytes can be skipped over before passing actual data
190 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they
191 * won't actually be read.
193 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
194 UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0);
195 UPB_ASSERT(d->skip == 0);
196 if (bytes > delim_remaining(d)) {
197 seterr(d, "Skipped value extended beyond enclosing submessage.");
198 return upb_pbdecoder_suspend(d);
199 } else if (bufleft(d) >= bytes) {
200 /* Skipped data is all in current buffer, and more is still available. */
205 /* Skipped data extends beyond currently available buffers. */
207 d->skip = bytes - curbufleft(d);
208 d->bufstart_ofs += (d->end - d->buf);
209 d->residual_end = d->residual;
210 switchtobuf(d, d->residual, d->residual_end);
211 return d->size_param + d->skip;
216 /* Resumes the decoder from an initial state or from a previous suspend. */
217 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
218 size_t size, const upb_bufhandle *handle) {
219 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */
221 /* d->skip and d->residual_end could probably elegantly be represented
222 * as a single variable, to more easily represent this invariant. */
223 UPB_ASSERT(!(d->skip && d->residual_end > d->residual));
225 /* We need to remember the original size_param, so that the value we return
226 * is relative to it, even if we do some skipping first. */
227 d->size_param = size;
230 /* Have to handle this case specially (ie. not with skip()) because the user
231 * is allowed to pass a NULL buffer here, which won't allow us to safely
232 * calculate a d->end or use our normal functions like curbufleft(). */
233 if (d->skip && d->skip >= size) {
235 d->bufstart_ofs += size;
239 /* We can't just return now, because we might need to execute some ops
240 * like CHECKDELIM, which could call some callbacks and pop the stack. */
243 /* We need to pretend that this was the actual buffer param, since some of the
244 * calculations assume that d->ptr/d->buf is relative to this. */
248 /* NULL buf is ok if its entire span is covered by the "skip" above, but
249 * by this point we know that "skip" doesn't cover the buffer. */
250 seterr(d, "Passed NULL buffer over non-skippable region.");
251 return upb_pbdecoder_suspend(d);
254 if (d->residual_end > d->residual) {
255 /* We have residual bytes from the last buffer. */
256 UPB_ASSERT(d->ptr == d->residual);
258 switchtobuf(d, buf, buf + size);
261 d->checkpoint = d->ptr;
263 /* Handle skips that don't cover the whole buffer (as above). */
265 size_t skip_bytes = d->skip;
267 CHECK_RETURN(skip(d, skip_bytes));
271 /* If we're inside an unknown group, continue to parse unknown values. */
272 if (d->top->groupnum < 0) {
273 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
280 /* Suspends the decoder at the last checkpoint, without saving any residual
281 * bytes. If there are any unconsumed bytes, returns a short byte count. */
282 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
284 if (d->checkpoint == d->residual) {
285 /* Checkpoint was in residual buf; no user bytes were consumed. */
286 d->ptr = d->residual;
289 size_t ret = d->size_param - (d->end - d->checkpoint);
290 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
291 UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char);
293 d->bufstart_ofs += (d->checkpoint - d->buf);
294 d->residual_end = d->residual;
295 switchtobuf(d, d->residual, d->residual_end);
300 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
301 * bytes in our residual buffer. This is necessary if we need more user
302 * bytes to form a complete value, which might not be contiguous in the
303 * user's buffers. Always consumes all user bytes. */
304 static size_t suspend_save(upb_pbdecoder *d) {
305 /* We hit end-of-buffer before we could parse a full value.
306 * Save any unconsumed bytes (if any) to the residual buffer. */
309 if (d->checkpoint == d->residual) {
310 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
311 UPB_ASSERT((d->residual_end - d->residual) + d->size_param <=
312 sizeof(d->residual));
313 if (!in_residual_buf(d, d->ptr)) {
314 d->bufstart_ofs -= (d->residual_end - d->residual);
316 memcpy(d->residual_end, d->buf_param, d->size_param);
317 d->residual_end += d->size_param;
319 /* Checkpoint was in user buf; old residual bytes not needed. */
321 UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
323 d->ptr = d->checkpoint;
324 save = curbufleft(d);
325 UPB_ASSERT(save <= sizeof(d->residual));
326 memcpy(d->residual, d->ptr, save);
327 d->residual_end = d->residual + save;
328 d->bufstart_ofs = offset(d);
331 switchtobuf(d, d->residual, d->residual_end);
332 return d->size_param;
335 /* Copies the next "bytes" bytes into "buf" and advances the stream.
336 * Requires that this many bytes are available in the current buffer. */
337 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
339 UPB_ASSERT(bytes <= curbufleft(d));
340 memcpy(buf, d->ptr, bytes);
344 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
345 * available in the current buffer or not. Returns a status code as described
346 * in decoder.int.h. */
347 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
349 const size_t avail = curbufleft(d);
350 consumebytes(d, buf, avail);
352 UPB_ASSERT(bytes > 0);
353 if (in_residual_buf(d, d->ptr)) {
354 advancetobuf(d, d->buf_param, d->size_param);
356 if (curbufleft(d) >= bytes) {
357 consumebytes(d, (char *)buf + avail, bytes);
359 } else if (d->data_end == d->delim_end) {
360 seterr(d, "Submessage ended in the middle of a value or group");
361 return upb_pbdecoder_suspend(d);
363 return suspend_save(d);
367 /* Gets the next "bytes" bytes, regardless of whether they are available in the
368 * current buffer or not. Returns a status code as described in decoder.int.h.
370 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
372 if (curbufleft(d) >= bytes) {
373 /* Buffer has enough data to satisfy. */
374 consumebytes(d, buf, bytes);
377 return getbytes_slow(d, buf, bytes);
381 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
383 size_t ret = curbufleft(d);
384 memcpy(buf, d->ptr, ret);
385 if (in_residual_buf(d, d->ptr)) {
386 size_t copy = UPB_MIN(bytes - ret, d->size_param);
387 memcpy((char *)buf + ret, d->buf_param, copy);
393 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
395 if (curbufleft(d) >= bytes) {
396 memcpy(buf, d->ptr, bytes);
399 return peekbytes_slow(d, buf, bytes);
404 /* Decoding of wire types *****************************************************/
406 /* Slow path for decoding a varint from the current buffer position.
407 * Returns a status code as described in decoder.int.h. */
408 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
413 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
414 CHECK_RETURN(getbytes(d, &byte, 1));
415 *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
417 if(bitpos == 70 && (byte & 0x80)) {
418 seterr(d, kUnterminatedVarint);
419 return upb_pbdecoder_suspend(d);
424 /* Decodes a varint from the current buffer position.
425 * Returns a status code as described in decoder.int.h. */
426 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
427 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
431 } else if (curbufleft(d) >= 10) {
433 upb_decoderet r = upb_vdecode_fast(d->ptr);
435 seterr(d, kUnterminatedVarint);
436 return upb_pbdecoder_suspend(d);
438 advance(d, r.p - d->ptr);
442 /* Slow case -- varint spans buffer seam. */
443 return upb_pbdecoder_decode_varint_slow(d, u64);
447 /* Decodes a 32-bit varint from the current buffer position.
448 * Returns a status code as described in decoder.int.h. */
449 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
451 int32_t ret = decode_varint(d, &u64);
452 if (ret >= 0) return ret;
453 if (u64 > UINT32_MAX) {
454 seterr(d, "Unterminated 32-bit varint");
455 /* TODO(haberman) guarantee that this function return is >= 0 somehow,
456 * so we know this path will always be treated as error by our caller.
457 * Right now the size_t -> int32_t can overflow and produce negative values.
460 return upb_pbdecoder_suspend(d);
466 /* Decodes a fixed32 from the current buffer position.
467 * Returns a status code as described in decoder.int.h.
468 * TODO: proper byte swapping for big-endian machines. */
469 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
470 return getbytes(d, u32, 4);
473 /* Decodes a fixed64 from the current buffer position.
474 * Returns a status code as described in decoder.int.h.
475 * TODO: proper byte swapping for big-endian machines. */
476 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
477 return getbytes(d, u64, 8);
480 /* Non-static versions of the above functions.
481 * These are called by the JIT for fallback paths. */
482 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
483 return decode_fixed32(d, u32);
486 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
487 return decode_fixed64(d, u64);
490 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
491 static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; }
493 /* Pushes a frame onto the decoder stack. */
494 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
495 upb_pbdecoder_frame *fr = d->top;
497 if (end > fr->end_ofs) {
498 seterr(d, kPbDecoderSubmessageTooLong);
500 } else if (fr == d->limit) {
501 seterr(d, kPbDecoderStackOverflow);
513 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
514 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
515 * field number) prior to hitting any enclosing submessage end, pushing our
516 * existing delim end prevents us from continuing to parse values from a
517 * corrupt proto that doesn't give us an END tag in time. */
518 if (!decoder_push(d, d->top->end_ofs))
520 d->top->groupnum = arg;
524 /* Pops a frame from the decoder stack. */
525 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
527 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
530 size_t bytes = upb_value_size(expected);
531 size_t read = peekbytes(d, &data, bytes);
532 if (read == bytes && data == expected) {
533 /* Advance past matched bytes. */
534 int32_t ok = getbytes(d, &data, read);
537 } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
538 return suspend_save(d);
540 return DECODE_MISMATCH;
544 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
551 CHECK_RETURN(decode_v32(d, &tag));
552 wire_type = tag & 0x7;
557 seterr(d, "Saw invalid field number (0)");
558 return upb_pbdecoder_suspend(d);
562 case UPB_WIRE_TYPE_32BIT:
563 CHECK_RETURN(skip(d, 4));
565 case UPB_WIRE_TYPE_64BIT:
566 CHECK_RETURN(skip(d, 8));
568 case UPB_WIRE_TYPE_VARINT: {
570 CHECK_RETURN(decode_varint(d, &u64));
573 case UPB_WIRE_TYPE_DELIMITED: {
575 CHECK_RETURN(decode_v32(d, &len));
576 CHECK_RETURN(skip(d, len));
579 case UPB_WIRE_TYPE_START_GROUP:
580 CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
582 case UPB_WIRE_TYPE_END_GROUP:
583 if (fieldnum == -d->top->groupnum) {
585 } else if (fieldnum == d->top->groupnum) {
586 return DECODE_ENDGROUP;
588 seterr(d, "Unmatched ENDGROUP tag.");
589 return upb_pbdecoder_suspend(d);
593 seterr(d, "Invalid wire type");
594 return upb_pbdecoder_suspend(d);
597 if (d->top->groupnum >= 0) {
598 /* TODO: More code needed for handling unknown groups. */
599 upb_sink_putunknown(d->top->sink, d->checkpoint, d->ptr - d->checkpoint);
603 /* Unknown group -- continue looping over unknown fields. */
608 static void goto_endmsg(upb_pbdecoder *d) {
610 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
612 d->pc = d->top->base + upb_value_getuint64(v);
615 /* Parses a tag and jumps to the corresponding bytecode instruction for this
618 * If the tag is unknown (or the wire type doesn't match), parses the field as
619 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode
620 * instruction for the end of message. */
621 static int32_t dispatch(upb_pbdecoder *d) {
622 upb_inttable *dispatch = d->top->dispatch;
630 CHECK_RETURN(decode_v32(d, &tag));
631 wire_type = tag & 0x7;
634 /* Lookup tag. Because of packed/non-packed compatibility, we have to
635 * check the wire type against two possibilities. */
636 if (fieldnum != DISPATCH_ENDMSG &&
637 upb_inttable_lookup32(dispatch, fieldnum, &val)) {
638 uint64_t v = upb_value_getuint64(val);
639 if (wire_type == (v & 0xff)) {
640 d->pc = d->top->base + (v >> 16);
642 } else if (wire_type == ((v >> 8) & 0xff)) {
644 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
646 d->pc = d->top->base + upb_value_getuint64(val);
651 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG
652 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
653 * we need to back up to, so that when we're done skipping unknown data we
654 * can re-check the delimited end. */
655 d->last--; /* Necessary if we get suspended */
657 UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM);
659 /* Unknown field or ENDGROUP. */
660 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
662 CHECK_RETURN(retval);
664 if (retval == DECODE_ENDGROUP) {
672 /* Callers know that the stack is more than one deep because the opcodes that
673 * call this only occur after PUSH operations. */
674 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
675 UPB_ASSERT(d->top != d->stack);
680 /* The main decoding loop *****************************************************/
682 /* The main decoder VM function. Uses traditional bytecode dispatch loop with a
683 * switch() statement. */
684 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
685 const upb_bufhandle* handle) {
687 #define VMCASE(op, code) \
688 case op: { code; if (consumes_input(op)) checkpoint(d); break; }
689 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
690 VMCASE(OP_PARSE_ ## type, { \
692 CHECK_RETURN(decode_ ## wt(d, &val)); \
693 upb_sink_put ## name(d->top->sink, arg, (convfunc)(val)); \
703 instruction = *d->pc++;
704 op = getop(instruction);
705 arg = instruction >> 8;
707 UPB_ASSERT(d->ptr != d->residual_end);
709 #ifdef UPB_DUMP_BYTECODE
710 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
713 (int)(d->ptr - d->buf),
714 (int)(d->data_end - d->ptr),
715 (int)(d->end - d->ptr),
716 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
717 (int)(d->pc - 1 - group->bytecode),
718 upb_pbdecoder_getopname(op),
722 /* Technically, we are losing data if we see a 32-bit varint that is not
723 * properly sign-extended. We could detect this and error about the data
724 * loss, but proto2 does not do this, so we pass. */
725 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t)
726 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t)
727 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t)
728 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t)
729 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t)
730 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t)
731 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t)
732 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t)
733 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t)
734 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t)
735 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t)
736 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t)
737 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t)
739 VMCASE(OP_SETDISPATCH,
740 d->top->base = d->pc - 1;
741 memcpy(&d->top->dispatch, d->pc, sizeof(void*));
742 d->pc += sizeof(void*) / sizeof(uint32_t);
745 CHECK_SUSPEND(upb_sink_startmsg(d->top->sink));
748 CHECK_SUSPEND(upb_sink_endmsg(d->top->sink, d->status));
751 upb_pbdecoder_frame *outer = outer_frame(d);
752 CHECK_SUSPEND(upb_sink_startseq(outer->sink, arg, &d->top->sink));
755 CHECK_SUSPEND(upb_sink_endseq(d->top->sink, arg));
757 VMCASE(OP_STARTSUBMSG,
758 upb_pbdecoder_frame *outer = outer_frame(d);
759 CHECK_SUSPEND(upb_sink_startsubmsg(outer->sink, arg, &d->top->sink));
762 CHECK_SUSPEND(upb_sink_endsubmsg(d->top->sink, arg));
765 uint32_t len = delim_remaining(d);
766 upb_pbdecoder_frame *outer = outer_frame(d);
767 CHECK_SUSPEND(upb_sink_startstr(outer->sink, arg, len, &d->top->sink));
769 d->pc++; /* Skip OP_STRING. */
773 uint32_t len = curbufleft(d);
774 size_t n = upb_sink_putstring(d->top->sink, arg, d->ptr, len, handle);
776 if (n > delim_remaining(d)) {
777 seterr(d, "Tried to skip past end of string.");
778 return upb_pbdecoder_suspend(d);
780 int32_t ret = skip(d, n);
781 /* This shouldn't return DECODE_OK, because n > len. */
782 UPB_ASSERT(ret >= 0);
787 if (n < len || d->delim_end == NULL) {
788 /* We aren't finished with this string yet. */
789 d->pc--; /* Repeat OP_STRING. */
790 if (n > 0) checkpoint(d);
791 return upb_pbdecoder_suspend(d);
795 CHECK_SUSPEND(upb_sink_endstr(d->top->sink, arg));
797 VMCASE(OP_PUSHTAGDELIM,
798 CHECK_SUSPEND(pushtagdelim(d, arg));
800 VMCASE(OP_SETBIGGROUPNUM,
801 d->top->groupnum = *d->pc++;
804 UPB_ASSERT(d->top > d->stack);
807 VMCASE(OP_PUSHLENDELIM,
809 CHECK_RETURN(decode_v32(d, &len));
810 CHECK_SUSPEND(decoder_push(d, offset(d) + len));
816 VMCASE(OP_CHECKDELIM,
817 /* We are guaranteed of this assert because we never allow ourselves to
818 * consume bytes beyond data_end, which covers delim_end when non-NULL.
820 UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end));
821 if (d->ptr == d->delim_end)
825 d->callstack[d->call_len++] = d->pc;
829 UPB_ASSERT(d->call_len > 0);
830 d->pc = d->callstack[--d->call_len];
837 CHECK_SUSPEND(curbufleft(d) > 0);
838 expected = (arg >> 8) & 0xff;
839 if (*d->ptr == expected) {
845 if (shortofs == LABEL_DISPATCH) {
846 CHECK_RETURN(dispatch(d));
849 break; /* Avoid checkpoint(). */
855 CHECK_SUSPEND(curbufleft(d) > 0);
856 expected = (arg >> 8) & 0xffff;
857 if (curbufleft(d) >= 2) {
859 memcpy(&actual, d->ptr, 2);
860 if (expected == actual) {
866 int32_t result = upb_pbdecoder_checktag_slow(d, expected);
867 if (result == DECODE_MISMATCH) goto badtag;
868 if (result >= 0) return result;
874 memcpy(&expected, d->pc, 8);
876 result = upb_pbdecoder_checktag_slow(d, expected);
877 if (result == DECODE_MISMATCH) goto badtag;
878 if (result >= 0) return result;
880 VMCASE(OP_DISPATCH, {
881 CHECK_RETURN(dispatch(d));
884 return d->size_param;
891 /* BytesHandler handlers ******************************************************/
893 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
894 upb_pbdecoder *d = closure;
895 UPB_UNUSED(size_hint);
896 d->top->end_ofs = UINT64_MAX;
899 d->callstack[0] = &halt;
905 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
906 upb_pbdecoder *d = closure;
908 UPB_UNUSED(size_hint);
909 d->top->end_ofs = UINT64_MAX;
916 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
917 upb_pbdecoder *d = closure;
918 const upb_pbdecodermethod *method = handler_data;
922 if (d->residual_end > d->residual) {
923 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
928 seterr(d, "Unexpected EOF inside skipped data");
932 if (d->top->end_ofs != UINT64_MAX) {
933 seterr(d, "Unexpected EOF inside delimited string");
937 /* The user's end() call indicates that the message ends here. */
939 d->top->end_ofs = end;
941 #ifdef UPB_USE_JIT_X64
942 if (method->is_native_) {
943 const mgroup *group = (const mgroup*)method->group;
944 if (d->top != d->stack)
945 d->stack->end_ofs = 0;
946 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
950 const uint32_t *p = d->pc;
951 d->stack->end_ofs = end;
952 /* Check the previous bytecode, but guard against beginning. */
953 if (p != method->code_base.ptr) p--;
954 if (getop(*p) == OP_CHECKDELIM) {
955 /* Rewind from OP_TAG* to OP_CHECKDELIM. */
956 UPB_ASSERT(getop(*d->pc) == OP_TAG1 ||
957 getop(*d->pc) == OP_TAG2 ||
958 getop(*d->pc) == OP_TAGN ||
959 getop(*d->pc) == OP_DISPATCH);
962 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
965 if (d->call_len != 0) {
966 seterr(d, "Unexpected EOF inside submessage or group");
973 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
974 size_t size, const upb_bufhandle *handle) {
975 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
977 if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
978 CHECK_RETURN(result);
980 return run_decoder_vm(decoder, group, handle);
984 /* Public API *****************************************************************/
986 void upb_pbdecoder_reset(upb_pbdecoder *d) {
988 d->top->groupnum = 0;
989 d->ptr = d->residual;
990 d->buf = d->residual;
991 d->end = d->residual;
992 d->residual_end = d->residual;
995 upb_pbdecoder *upb_pbdecoder_create(upb_arena *a, const upb_pbdecodermethod *m,
996 upb_sink sink, upb_status *status) {
997 const size_t default_max_nesting = 64;
999 size_t size_before = upb_arena_bytesallocated(a);
1002 upb_pbdecoder *d = upb_arena_malloc(a, sizeof(upb_pbdecoder));
1003 if (!d) return NULL;
1006 d->callstack = upb_arena_malloc(a, callstacksize(d, default_max_nesting));
1007 d->stack = upb_arena_malloc(a, stacksize(d, default_max_nesting));
1008 if (!d->stack || !d->callstack) {
1013 d->limit = d->stack + default_max_nesting - 1;
1014 d->stack_size = default_max_nesting;
1017 upb_pbdecoder_reset(d);
1018 upb_bytessink_reset(&d->input_, &m->input_handler_, d);
1020 if (d->method_->dest_handlers_) {
1021 if (sink.handlers != d->method_->dest_handlers_)
1024 d->top->sink = sink;
1026 /* If this fails, increase the value in decoder.h. */
1027 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
1028 UPB_PB_DECODER_SIZE);
1032 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
1036 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
1040 upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d) {
1044 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
1045 return d->stack_size;
1048 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
1049 UPB_ASSERT(d->top >= d->stack);
1051 if (max < (size_t)(d->top - d->stack)) {
1052 /* Can't set a limit smaller than what we are currently at. */
1056 if (max > d->stack_size) {
1057 /* Need to reallocate stack and callstack to accommodate. */
1058 size_t old_size = stacksize(d, d->stack_size);
1059 size_t new_size = stacksize(d, max);
1060 void *p = upb_arena_realloc(d->arena, d->stack, old_size, new_size);
1066 old_size = callstacksize(d, d->stack_size);
1067 new_size = callstacksize(d, max);
1068 p = upb_arena_realloc(d->arena, d->callstack, old_size, new_size);
1074 d->stack_size = max;
1077 d->limit = d->stack + max - 1;