4 ** This header is INTERNAL-ONLY! Its interfaces are not public or stable!
5 ** This file defines very fast int->upb_value (inttable) and string->upb_value
6 ** (strtable) hash tables.
8 ** The table uses chained scatter with Brent's variation (inspired by the Lua
9 ** implementation of hash tables). The hash function for strings is Austin
10 ** Appleby's "MurmurHash."
12 ** The inttable uses uintptr_t as its key, which guarantees it can be used to
13 ** store pointers or integers of at least 32 bits (upb isn't really useful on
14 ** systems where sizeof(void*) < 4).
16 ** The table must be homogenous (all values of the same type). In debug
17 ** mode, we check this on insert and lookup.
27 #include "upb/port_def.inc"
34 /* upb_value ******************************************************************/
36 /* A tagged union (stored untagged inside the table) so that we can check that
37 * clients calling table accessors are correctly typed without having to have
38 * an explosion of accessors. */
47 UPB_CTYPE_CONSTPTR = 8,
56 /* In debug mode we carry the value type around also so we can check accesses
57 * to be sure the right member is being read. */
63 #define SET_TYPE(dest, val) UPB_UNUSED(val)
65 #define SET_TYPE(dest, val) dest = val
68 /* Like strdup(), which isn't always available since it's not ANSI C. */
69 char *upb_strdup(const char *s, upb_alloc *a);
70 /* Variant that works with a length-delimited rather than NULL-delimited string,
71 * as supported by strtable. */
72 char *upb_strdup2(const char *s, size_t len, upb_alloc *a);
74 UPB_INLINE char *upb_gstrdup(const char *s) {
75 return upb_strdup(s, &upb_alloc_global);
78 UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val,
81 SET_TYPE(v->ctype, ctype);
84 UPB_INLINE upb_value _upb_value_val(uint64_t val, upb_ctype_t ctype) {
86 _upb_value_setval(&ret, val, ctype);
90 /* For each value ctype, define the following set of functions:
92 * // Get/set an int32 from a upb_value.
93 * int32_t upb_value_getint32(upb_value val);
94 * void upb_value_setint32(upb_value *val, int32_t cval);
96 * // Construct a new upb_value from an int32.
97 * upb_value upb_value_int32(int32_t val); */
98 #define FUNCS(name, membername, type_t, converter, proto_type) \
99 UPB_INLINE void upb_value_set ## name(upb_value *val, type_t cval) { \
100 val->val = (converter)cval; \
101 SET_TYPE(val->ctype, proto_type); \
103 UPB_INLINE upb_value upb_value_ ## name(type_t val) { \
105 upb_value_set ## name(&ret, val); \
108 UPB_INLINE type_t upb_value_get ## name(upb_value val) { \
109 UPB_ASSERT_DEBUGVAR(val.ctype == proto_type); \
110 return (type_t)(converter)val.val; \
113 FUNCS(int32, int32, int32_t, int32_t, UPB_CTYPE_INT32)
114 FUNCS(int64, int64, int64_t, int64_t, UPB_CTYPE_INT64)
115 FUNCS(uint32, uint32, uint32_t, uint32_t, UPB_CTYPE_UINT32)
116 FUNCS(uint64, uint64, uint64_t, uint64_t, UPB_CTYPE_UINT64)
117 FUNCS(bool, _bool, bool, bool, UPB_CTYPE_BOOL)
118 FUNCS(cstr, cstr, char*, uintptr_t, UPB_CTYPE_CSTR)
119 FUNCS(ptr, ptr, void*, uintptr_t, UPB_CTYPE_PTR)
120 FUNCS(constptr, constptr, const void*, uintptr_t, UPB_CTYPE_CONSTPTR)
121 FUNCS(fptr, fptr, upb_func*, uintptr_t, UPB_CTYPE_FPTR)
125 UPB_INLINE void upb_value_setfloat(upb_value *val, float cval) {
126 memcpy(&val->val, &cval, sizeof(cval));
127 SET_TYPE(val->ctype, UPB_CTYPE_FLOAT);
130 UPB_INLINE void upb_value_setdouble(upb_value *val, double cval) {
131 memcpy(&val->val, &cval, sizeof(cval));
132 SET_TYPE(val->ctype, UPB_CTYPE_DOUBLE);
135 UPB_INLINE upb_value upb_value_float(float cval) {
137 upb_value_setfloat(&ret, cval);
141 UPB_INLINE upb_value upb_value_double(double cval) {
143 upb_value_setdouble(&ret, cval);
150 /* upb_tabkey *****************************************************************/
153 * 1. an actual integer key, or
154 * 2. a pointer to a string prefixed by its uint32_t length, owned by us.
156 * ...depending on whether this is a string table or an int table. We would
157 * make this a union of those two types, but C89 doesn't support statically
158 * initializing a non-first union member. */
159 typedef uintptr_t upb_tabkey;
161 UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
162 char* mem = (char*)key;
163 if (len) memcpy(len, mem, sizeof(*len));
164 return mem + sizeof(*len);
168 /* upb_tabval *****************************************************************/
174 #define UPB_TABVALUE_EMPTY_INIT {-1}
177 /* upb_table ******************************************************************/
179 typedef struct _upb_tabent {
183 /* Internal chaining. This is const so we can create static initializers for
184 * tables. We cast away const sometimes, but *only* when the containing
185 * upb_table is known to be non-const. This requires a bit of care, but
186 * the subtlety is confined to table.c. */
187 const struct _upb_tabent *next;
191 size_t count; /* Number of entries in the hash part. */
192 size_t mask; /* Mask to turn hash value -> bucket. */
193 upb_ctype_t ctype; /* Type of all values. */
194 uint8_t size_lg2; /* Size of the hashtable part is 2^size_lg2 entries. */
196 /* Hash table entries.
197 * Making this const isn't entirely accurate; what we really want is for it to
198 * have the same const-ness as the table it's inside. But there's no way to
199 * declare that in C. So we have to make it const so that we can statically
200 * initialize const hash tables. Then we cast away const when we have to.
202 const upb_tabent *entries;
205 /* This table's allocator. We make the user pass it in to every relevant
206 * function and only use this to check it in debug mode. We do this solely
207 * to keep upb_table as small as possible. This might seem slightly paranoid
208 * but the plan is to use upb_table for all map fields and extension sets in
209 * a forthcoming message representation, so there could be a lot of these.
210 * If this turns out to be too annoying later, we can change it (since this
211 * is an internal-only header file). */
221 upb_table t; /* For entries that don't fit in the array part. */
222 const upb_tabval *array; /* Array part of the table. See const note above. */
223 size_t array_size; /* Array part size. */
224 size_t array_count; /* Array part number of elements. */
227 #define UPB_INTTABLE_INIT(count, mask, ctype, size_lg2, ent, a, asize, acount) \
228 {UPB_TABLE_INIT(count, mask, ctype, size_lg2, ent), a, asize, acount}
230 #define UPB_EMPTY_INTTABLE_INIT(ctype) \
231 UPB_INTTABLE_INIT(0, 0, ctype, 0, NULL, NULL, 0, 0)
233 #define UPB_ARRAY_EMPTYENT -1
235 UPB_INLINE size_t upb_table_size(const upb_table *t) {
236 if (t->size_lg2 == 0)
239 return 1 << t->size_lg2;
242 /* Internal-only functions, in .h file only out of necessity. */
243 UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
247 /* Used by some of the unit tests for generic hashing functionality. */
248 uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed);
250 UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
254 UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
255 return (uint32_t)key;
258 static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
259 return t->entries + (hash & t->mask);
262 UPB_INLINE bool upb_arrhas(upb_tabval key) {
263 return key.val != (uint64_t)-1;
266 /* Initialize and uninitialize a table, respectively. If memory allocation
267 * failed, false is returned that the table is uninitialized. */
268 bool upb_inttable_init2(upb_inttable *table, upb_ctype_t ctype, upb_alloc *a);
269 bool upb_strtable_init2(upb_strtable *table, upb_ctype_t ctype, upb_alloc *a);
270 void upb_inttable_uninit2(upb_inttable *table, upb_alloc *a);
271 void upb_strtable_uninit2(upb_strtable *table, upb_alloc *a);
273 UPB_INLINE bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype) {
274 return upb_inttable_init2(table, ctype, &upb_alloc_global);
277 UPB_INLINE bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype) {
278 return upb_strtable_init2(table, ctype, &upb_alloc_global);
281 UPB_INLINE void upb_inttable_uninit(upb_inttable *table) {
282 upb_inttable_uninit2(table, &upb_alloc_global);
285 UPB_INLINE void upb_strtable_uninit(upb_strtable *table) {
286 upb_strtable_uninit2(table, &upb_alloc_global);
289 /* Returns the number of values in the table. */
290 size_t upb_inttable_count(const upb_inttable *t);
291 UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
295 void upb_inttable_packedsize(const upb_inttable *t, size_t *size);
296 void upb_strtable_packedsize(const upb_strtable *t, size_t *size);
297 upb_inttable *upb_inttable_pack(const upb_inttable *t, void *p, size_t *ofs,
299 upb_strtable *upb_strtable_pack(const upb_strtable *t, void *p, size_t *ofs,
302 /* Inserts the given key into the hashtable with the given value. The key must
303 * not already exist in the hash table. For string tables, the key must be
304 * NULL-terminated, and the table will make an internal copy of the key.
305 * Inttables must not insert a value of UINTPTR_MAX.
307 * If a table resize was required but memory allocation failed, false is
308 * returned and the table is unchanged. */
309 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
311 bool upb_strtable_insert3(upb_strtable *t, const char *key, size_t len,
312 upb_value val, upb_alloc *a);
314 UPB_INLINE bool upb_inttable_insert(upb_inttable *t, uintptr_t key,
316 return upb_inttable_insert2(t, key, val, &upb_alloc_global);
319 UPB_INLINE bool upb_strtable_insert2(upb_strtable *t, const char *key,
320 size_t len, upb_value val) {
321 return upb_strtable_insert3(t, key, len, val, &upb_alloc_global);
324 /* For NULL-terminated strings. */
325 UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
327 return upb_strtable_insert2(t, key, strlen(key), val);
330 /* Looks up key in this table, returning "true" if the key was found.
331 * If v is non-NULL, copies the value for this key into *v. */
332 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
333 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
336 /* For NULL-terminated strings. */
337 UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
339 return upb_strtable_lookup2(t, key, strlen(key), v);
342 /* Removes an item from the table. Returns true if the remove was successful,
343 * and stores the removed item in *val if non-NULL. */
344 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
345 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
346 upb_value *val, upb_alloc *alloc);
348 UPB_INLINE bool upb_strtable_remove2(upb_strtable *t, const char *key,
349 size_t len, upb_value *val) {
350 return upb_strtable_remove3(t, key, len, val, &upb_alloc_global);
353 /* For NULL-terminated strings. */
354 UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
356 return upb_strtable_remove2(t, key, strlen(key), v);
359 /* Updates an existing entry in an inttable. If the entry does not exist,
360 * returns false and does nothing. Unlike insert/remove, this does not
361 * invalidate iterators. */
362 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
364 /* Handy routines for treating an inttable like a stack. May not be mixed with
365 * other insert/remove calls. */
366 bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a);
367 upb_value upb_inttable_pop(upb_inttable *t);
369 UPB_INLINE bool upb_inttable_push(upb_inttable *t, upb_value val) {
370 return upb_inttable_push2(t, val, &upb_alloc_global);
373 /* Convenience routines for inttables with pointer keys. */
374 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
376 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
377 bool upb_inttable_lookupptr(
378 const upb_inttable *t, const void *key, upb_value *val);
380 UPB_INLINE bool upb_inttable_insertptr(upb_inttable *t, const void *key,
382 return upb_inttable_insertptr2(t, key, val, &upb_alloc_global);
385 /* Optimizes the table for the current set of entries, for both memory use and
386 * lookup time. Client should call this after all entries have been inserted;
387 * inserting more entries is legal, but will likely require a table resize. */
388 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a);
390 UPB_INLINE void upb_inttable_compact(upb_inttable *t) {
391 upb_inttable_compact2(t, &upb_alloc_global);
394 /* A special-case inlinable version of the lookup routine for 32-bit
396 UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
398 *v = upb_value_int32(0); /* Silence compiler warnings. */
399 if (key < t->array_size) {
400 upb_tabval arrval = t->array[key];
401 if (upb_arrhas(arrval)) {
402 _upb_value_setval(v, arrval.val, t->t.ctype);
409 if (t->t.entries == NULL) return false;
410 for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
411 if ((uint32_t)e->key == key) {
412 _upb_value_setval(v, e->val.val, t->t.ctype);
415 if (e->next == NULL) return false;
420 /* Exposed for testing only. */
421 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a);
423 /* Iterators ******************************************************************/
425 /* Iterators for int and string tables. We are subject to some kind of unusual
426 * design constraints:
428 * For high-level languages:
429 * - we must be able to guarantee that we don't crash or corrupt memory even if
430 * the program accesses an invalidated iterator.
432 * For C++11 range-based for:
433 * - iterators must be copyable
434 * - iterators must be comparable
435 * - it must be possible to construct an "end" value.
437 * Iteration order is undefined.
439 * Modifying the table invalidates iterators. upb_{str,int}table_done() is
440 * guaranteed to work even on an invalidated iterator, as long as the table it
441 * is iterating over has not been freed. Calling next() or accessing data from
442 * an invalidated iterator yields unspecified elements from the table, but it is
443 * guaranteed not to crash and to return real table elements (except when done()
447 /* upb_strtable_iter **********************************************************/
449 /* upb_strtable_iter i;
450 * upb_strtable_begin(&i, t);
451 * for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
452 * const char *key = upb_strtable_iter_key(&i);
453 * const upb_value val = upb_strtable_iter_value(&i);
459 const upb_strtable *t;
463 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
464 void upb_strtable_next(upb_strtable_iter *i);
465 bool upb_strtable_done(const upb_strtable_iter *i);
466 const char *upb_strtable_iter_key(const upb_strtable_iter *i);
467 size_t upb_strtable_iter_keylength(const upb_strtable_iter *i);
468 upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
469 void upb_strtable_iter_setdone(upb_strtable_iter *i);
470 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
471 const upb_strtable_iter *i2);
474 /* upb_inttable_iter **********************************************************/
476 /* upb_inttable_iter i;
477 * upb_inttable_begin(&i, t);
478 * for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
479 * uintptr_t key = upb_inttable_iter_key(&i);
480 * upb_value val = upb_inttable_iter_value(&i);
486 const upb_inttable *t;
491 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
492 void upb_inttable_next(upb_inttable_iter *i);
493 bool upb_inttable_done(const upb_inttable_iter *i);
494 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i);
495 upb_value upb_inttable_iter_value(const upb_inttable_iter *i);
496 void upb_inttable_iter_setdone(upb_inttable_iter *i);
497 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
498 const upb_inttable_iter *i2);
505 #include "upb/port_undef.inc"
507 #endif /* UPB_TABLE_H_ */