2 ** Internal-only definitions for the decoder.
5 #ifndef UPB_DECODER_INT_H_
6 #define UPB_DECODER_INT_H_
9 #include "upb/handlers.h"
10 #include "upb/pb/decoder.h"
12 #include "upb/table.int.h"
14 #include "upb/port_def.inc"
16 /* Opcode definitions. The canonical meaning of each opcode is its
17 * implementation in the interpreter (the JIT is written to match this).
19 * All instructions have the opcode in the low byte.
20 * Instruction format for most instructions is:
22 * +-------------------+--------+
23 * | arg (24) | op (8) |
24 * +-------------------+--------+
26 * Exceptions are indicated below. A few opcodes are multi-word. */
28 /* Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
29 * Arg for all of these is the upb selector for this field. */
30 #define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type
31 T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
32 T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
34 OP_STARTMSG = 9, /* No arg. */
35 OP_ENDMSG = 10, /* No arg. */
44 OP_PUSHTAGDELIM = 23, /* No arg. */
45 OP_PUSHLENDELIM = 24, /* No arg. */
46 OP_POP = 25, /* No arg. */
47 OP_SETDELIM = 26, /* No arg. */
48 OP_SETBIGGROUPNUM = 27, /* two words:
49 * | unused (24) | opc (8) |
50 * | groupnum (32) | */
56 /* Different opcodes depending on how many bytes expected. */
57 OP_TAG1 = 32, /* | match tag (16) | jump target (8) | opc (8) | */
58 OP_TAG2 = 33, /* | match tag (16) | jump target (8) | opc (8) | */
59 OP_TAGN = 34, /* three words: */
60 /* | unused (16) | jump target(8) | opc (8) | */
61 /* | match tag 1 (32) | */
62 /* | match tag 2 (32) | */
64 OP_SETDISPATCH = 35, /* N words: */
65 /* | unused (24) | opc | */
66 /* | upb_inttable* (32 or 64) | */
68 OP_DISPATCH = 36, /* No arg. */
70 OP_HALT = 37 /* No arg. */
73 #define OP_MAX OP_HALT
75 UPB_INLINE opcode getop(uint32_t instr) { return (opcode)(instr & 0xff); }
77 struct upb_pbcodecache {
79 upb_handlercache *dest;
83 /* Array of mgroups. */
87 /* Method group; represents a set of decoder methods that had their code
88 * emitted together. Immutable once created. */
90 /* Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod. Owned by us.
92 * Ideally this would be on pbcodecache (if we were actually caching code).
93 * Right now we don't actually cache anything, which is wasteful. */
96 /* The bytecode for our methods, if any exists. Owned by us. */
98 uint32_t *bytecode_end;
100 #ifdef UPB_USE_JIT_X64
101 /* JIT-generated machine code, if any. */
102 upb_string_handlerfunc *jit_code;
103 /* The size of the jit_code (required to munmap()). */
110 /* The maximum that any submessages can be nested. Matches proto2's limit.
111 * This specifies the size of the decoder's statically-sized array and therefore
112 * setting it high will cause the upb::pb::Decoder object to be larger.
114 * If necessary we can add a runtime-settable property to Decoder that allow
115 * this to be larger than the compile-time setting, but this would add
116 * complexity, particularly since we would have to decide how/if to give users
117 * the ability to set a custom memory allocation function. */
118 #define UPB_DECODER_MAX_NESTING 64
120 /* Internal-only struct used by the decoder. */
122 /* Space optimization note: we store two pointers here that the JIT
123 * doesn't need at all; the upb_handlers* inside the sink and
124 * the dispatch table pointer. We can optimze so that the JIT uses
125 * smaller stack frames than the interpreter. The only thing we need
126 * to guarantee is that the fallback routines can find end_ofs. */
129 /* The absolute stream offset of the end-of-frame delimiter.
130 * Non-delimited frames (groups and non-packed repeated fields) reuse the
131 * delimiter of their parent, even though the frame may not end there.
133 * NOTE: the JIT stores a slightly different value here for non-top frames.
134 * It stores the value relative to the end of the enclosed message. But the
135 * top frame is still stored the same way, which is important for ensuring
136 * that calls from the JIT into C work correctly. */
138 const uint32_t *base;
140 /* 0 indicates a length-delimited field.
141 * A positive number indicates a known group.
142 * A negative number indicates an unknown group. */
144 upb_inttable *dispatch; /* Not used by the JIT. */
145 } upb_pbdecoder_frame;
147 struct upb_pbdecodermethod {
148 /* While compiling, the base is relative in "ofs", after compiling it is
149 * absolute in "ptr". */
151 uint32_t ofs; /* PC offset of method. */
152 void *ptr; /* Pointer to bytecode or machine code for this method. */
155 /* The decoder method group to which this method belongs. */
158 /* Whether this method is native code or bytecode. */
161 /* The handler one calls to invoke this method. */
162 upb_byteshandler input_handler_;
164 /* The destination handlers this method is bound to. We own a ref. */
165 const upb_handlers *dest_handlers_;
167 /* Dispatch table -- used by both bytecode decoder and JIT when encountering a
168 * field number that wasn't the one we were expecting to see. See
169 * decoder.int.h for the layout of this table. */
170 upb_inttable dispatch;
173 struct upb_pbdecoder {
176 /* Our input sink. */
177 upb_bytessink input_;
179 /* The decoder method we are parsing with (owned). */
180 const upb_pbdecodermethod *method_;
183 const uint32_t *pc, *last;
185 /* Current input buffer and its stream offset. */
186 const char *buf, *ptr, *end, *checkpoint;
188 /* End of the delimited region, relative to ptr, NULL if not in this buf. */
189 const char *delim_end;
191 /* End of the delimited region, relative to ptr, end if not in this buf. */
192 const char *data_end;
194 /* Overall stream offset of "buf." */
195 uint64_t bufstart_ofs;
197 /* Buffer for residual bytes not parsed from the previous buffer. */
198 char residual[UPB_DECODER_MAX_RESIDUAL_BYTES];
201 /* Bytes of data that should be discarded from the input beore we start
202 * parsing again. We set this when we internally determine that we can
203 * safely skip the next N bytes, but this region extends past the current
207 /* Stores the user buffer passed to our decode function. */
208 const char *buf_param;
210 const upb_bufhandle *handle;
212 /* Our internal stack. */
213 upb_pbdecoder_frame *stack, *top, *limit;
214 const uint32_t **callstack;
219 #ifdef UPB_USE_JIT_X64
220 /* Used momentarily by the generated code to store a value while a user
221 * function is called. */
224 const void *saved_rsp;
228 /* Decoder entry points; used as handlers. */
229 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
230 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);
231 size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
232 size_t size, const upb_bufhandle *handle);
233 bool upb_pbdecoder_end(void *closure, const void *handler_data);
235 /* Decoder-internal functions that the JIT calls to handle fallback paths. */
236 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
237 size_t size, const upb_bufhandle *handle);
238 size_t upb_pbdecoder_suspend(upb_pbdecoder *d);
239 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
241 int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected);
242 int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64);
243 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32);
244 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64);
245 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);
247 /* Error messages that are shared between the bytecode and JIT decoders. */
248 extern const char *kPbDecoderStackOverflow;
249 extern const char *kPbDecoderSubmessageTooLong;
251 /* Access to decoderplan members needed by the decoder. */
252 const char *upb_pbdecoder_getopname(unsigned int op);
254 /* JIT codegen entry point. */
255 void upb_pbdecoder_jit(mgroup *group);
256 void upb_pbdecoder_freejit(mgroup *group);
258 /* A special label that means "do field dispatch for this message and branch to
259 * wherever that takes you." */
260 #define LABEL_DISPATCH 0
262 /* A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
263 * RET) for branching to when we find an appropriate ENDGROUP tag. */
264 #define DISPATCH_ENDMSG 0
266 /* It's important to use this invalid wire type instead of 0 (which is a valid
268 #define NO_WIRE_TYPE 0xff
270 /* The dispatch table layout is:
271 * [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
273 * If wt1 matches, jump to the 48-bit offset. If wt2 matches, lookup
274 * (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
276 * We need two wire types because of packed/non-packed compatibility. A
277 * primitive repeated field can use either wire type and be valid. While we
278 * could key the table on fieldnum+wiretype, the table would be 8x sparser.
280 * Storing two wire types in the primary value allows us to quickly rule out
281 * the second wire type without needing to do a separate lookup (this case is
282 * less common than an unknown field). */
283 UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1,
285 return (ofs << 16) | (wt2 << 8) | wt1;
288 UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs,
289 uint8_t *wt1, uint8_t *wt2) {
290 *wt1 = (uint8_t)dispatch;
291 *wt2 = (uint8_t)(dispatch >> 8);
292 *ofs = dispatch >> 16;
295 /* All of the functions in decoder.c that return int32_t return values according
296 * to the following scheme:
297 * 1. negative values indicate a return code from the following list.
298 * 2. positive values indicate that error or end of buffer was hit, and
299 * that the decode function should immediately return the given value
300 * (the decoder state has already been suspended and is ready to be
303 #define DECODE_MISMATCH -2 /* Used only from checktag_slow(). */
304 #define DECODE_ENDGROUP -3 /* Used only from checkunknown(). */
306 #define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }
308 #include "upb/port_undef.inc"
310 #endif /* UPB_DECODER_INT_H_ */