1 // Copyright 2018 The Abseil Authors.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // https://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
16 // hashtable with quadratic probing.
18 // This is a low level hashtable on top of which different interfaces can be
19 // implemented, like flat_hash_set, node_hash_set, string_hash_set, etc.
21 // The table interface is similar to that of std::unordered_set. Notable
22 // differences are that most member functions support heterogeneous keys when
23 // BOTH the hash and eq functions are marked as transparent. They do so by
24 // providing a typedef called `is_transparent`.
26 // When heterogeneous lookup is enabled, functions that take key_type act as if
27 // they have an overload set like:
29 // iterator find(const key_type& key);
31 // iterator find(const K& key);
33 // size_type erase(const key_type& key);
35 // size_type erase(const K& key);
37 // std::pair<iterator, iterator> equal_range(const key_type& key);
39 // std::pair<iterator, iterator> equal_range(const K& key);
41 // When heterogeneous lookup is disabled, only the explicit `key_type` overloads
44 // find() also supports passing the hash explicitly:
46 // iterator find(const key_type& key, size_t hash);
48 // iterator find(const U& key, size_t hash);
50 // In addition the pointer to element and iterator stability guarantees are
51 // weaker: all iterators and pointers are invalidated after a new element is
54 // IMPLEMENTATION DETAILS
56 // The table stores elements inline in a slot array. In addition to the slot
57 // array the table maintains some control state per slot. The extra state is one
58 // byte per slot and stores empty or deleted marks, or alternatively 7 bits from
59 // the hash of an occupied slot. The table is split into logical groups of
62 // Group 1 Group 2 Group 3
63 // +---------------+---------------+---------------+
64 // | | | | | | | | | | | | | | | | | | | | | | | | |
65 // +---------------+---------------+---------------+
67 // On lookup the hash is split into two parts:
68 // - H2: 7 bits (those stored in the control bytes)
69 // - H1: the rest of the bits
70 // The groups are probed using H1. For each group the slots are matched to H2 in
71 // parallel. Because H2 is 7 bits (128 states) and the number of slots per group
72 // is low (8 or 16) in almost all cases a match in H2 is also a lookup hit.
74 // On insert, once the right group is found (as in lookup), its slots are
77 // On erase a slot is cleared. In case the group did not have any empty slots
78 // before the erase, the erased slot is marked as deleted.
80 // Groups without empty slots (but maybe with deleted slots) extend the probe
81 // sequence. The probing algorithm is quadratic. Given N the number of groups,
82 // the probing function for the i'th probe is:
86 // P(i) = (P(i - 1) + i) % N
88 // This probing function guarantees that after N probes, all the groups of the
89 // table will be probed exactly once.
91 #ifndef ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_
92 #define ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_
102 #include <type_traits>
105 #include "absl/base/internal/bits.h"
106 #include "absl/base/internal/endian.h"
107 #include "absl/base/port.h"
108 #include "absl/container/internal/common.h"
109 #include "absl/container/internal/compressed_tuple.h"
110 #include "absl/container/internal/container_memory.h"
111 #include "absl/container/internal/hash_policy_traits.h"
112 #include "absl/container/internal/hashtable_debug_hooks.h"
113 #include "absl/container/internal/hashtablez_sampler.h"
114 #include "absl/container/internal/have_sse.h"
115 #include "absl/container/internal/layout.h"
116 #include "absl/memory/memory.h"
117 #include "absl/meta/type_traits.h"
118 #include "absl/utility/utility.h"
121 namespace container_internal {
123 template <size_t Width>
126 probe_seq(size_t hash, size_t mask) {
127 assert(((mask + 1) & mask) == 0 && "not a mask");
129 offset_ = hash & mask_;
131 size_t offset() const { return offset_; }
132 size_t offset(size_t i) const { return (offset_ + i) & mask_; }
139 // 0-based probe index. The i-th probe in the probe sequence.
140 size_t index() const { return index_; }
148 template <class ContainerKey, class Hash, class Eq>
149 struct RequireUsableKey {
150 template <class PassedKey, class... Args>
152 decltype(std::declval<const Hash&>()(std::declval<const PassedKey&>())),
153 decltype(std::declval<const Eq&>()(std::declval<const ContainerKey&>(),
154 std::declval<const PassedKey&>()))>*
155 operator()(const PassedKey&, const Args&...) const;
158 template <class E, class Policy, class Hash, class Eq, class... Ts>
159 struct IsDecomposable : std::false_type {};
161 template <class Policy, class Hash, class Eq, class... Ts>
162 struct IsDecomposable<
163 absl::void_t<decltype(
164 Policy::apply(RequireUsableKey<typename Policy::key_type, Hash, Eq>(),
165 std::declval<Ts>()...))>,
166 Policy, Hash, Eq, Ts...> : std::true_type {};
168 // TODO(alkis): Switch to std::is_nothrow_swappable when gcc/clang supports it.
170 constexpr bool IsNoThrowSwappable() {
172 return noexcept(swap(std::declval<T&>(), std::declval<T&>()));
175 template <typename T>
176 int TrailingZeros(T x) {
177 return sizeof(T) == 8 ? base_internal::CountTrailingZerosNonZero64(
178 static_cast<uint64_t>(x))
179 : base_internal::CountTrailingZerosNonZero32(
180 static_cast<uint32_t>(x));
183 template <typename T>
184 int LeadingZeros(T x) {
185 return sizeof(T) == 8
186 ? base_internal::CountLeadingZeros64(static_cast<uint64_t>(x))
187 : base_internal::CountLeadingZeros32(static_cast<uint32_t>(x));
190 // An abstraction over a bitmask. It provides an easy way to iterate through the
191 // indexes of the set bits of a bitmask. When Shift=0 (platforms with SSE),
192 // this is a true bitmask. On non-SSE, platforms the arithematic used to
193 // emulate the SSE behavior works in bytes (Shift=3) and leaves each bytes as
194 // either 0x00 or 0x80.
197 // for (int i : BitMask<uint32_t, 16>(0x5)) -> yields 0, 2
198 // for (int i : BitMask<uint64_t, 8, 3>(0x0000000080800000)) -> yields 2, 3
199 template <class T, int SignificantBits, int Shift = 0>
201 static_assert(std::is_unsigned<T>::value, "");
202 static_assert(Shift == 0 || Shift == 3, "");
205 // These are useful for unit tests (gunit).
206 using value_type = int;
207 using iterator = BitMask;
208 using const_iterator = BitMask;
210 explicit BitMask(T mask) : mask_(mask) {}
211 BitMask& operator++() {
212 mask_ &= (mask_ - 1);
215 explicit operator bool() const { return mask_ != 0; }
216 int operator*() const { return LowestBitSet(); }
217 int LowestBitSet() const {
218 return container_internal::TrailingZeros(mask_) >> Shift;
220 int HighestBitSet() const {
221 return (sizeof(T) * CHAR_BIT - container_internal::LeadingZeros(mask_) -
226 BitMask begin() const { return *this; }
227 BitMask end() const { return BitMask(0); }
229 int TrailingZeros() const {
230 return container_internal::TrailingZeros(mask_) >> Shift;
233 int LeadingZeros() const {
234 constexpr int total_significant_bits = SignificantBits << Shift;
235 constexpr int extra_bits = sizeof(T) * 8 - total_significant_bits;
236 return container_internal::LeadingZeros(mask_ << extra_bits) >> Shift;
240 friend bool operator==(const BitMask& a, const BitMask& b) {
241 return a.mask_ == b.mask_;
243 friend bool operator!=(const BitMask& a, const BitMask& b) {
244 return a.mask_ != b.mask_;
250 using ctrl_t = signed char;
251 using h2_t = uint8_t;
253 // The values here are selected for maximum performance. See the static asserts
254 // below for details.
256 kEmpty = -128, // 0b10000000
257 kDeleted = -2, // 0b11111110
258 kSentinel = -1, // 0b11111111
261 kEmpty & kDeleted & kSentinel & 0x80,
262 "Special markers need to have the MSB to make checking for them efficient");
263 static_assert(kEmpty < kSentinel && kDeleted < kSentinel,
264 "kEmpty and kDeleted must be smaller than kSentinel to make the "
265 "SIMD test of IsEmptyOrDeleted() efficient");
266 static_assert(kSentinel == -1,
267 "kSentinel must be -1 to elide loading it from memory into SIMD "
268 "registers (pcmpeqd xmm, xmm)");
269 static_assert(kEmpty == -128,
270 "kEmpty must be -128 to make the SIMD check for its "
271 "existence efficient (psignb xmm, xmm)");
272 static_assert(~kEmpty & ~kDeleted & kSentinel & 0x7F,
273 "kEmpty and kDeleted must share an unset bit that is not shared "
274 "by kSentinel to make the scalar test for MatchEmptyOrDeleted() "
276 static_assert(kDeleted == -2,
277 "kDeleted must be -2 to make the implementation of "
278 "ConvertSpecialToEmptyAndFullToDeleted efficient");
280 // A single block of empty control bytes for tables without any slots allocated.
281 // This enables removing a branch in the hot path of find().
282 inline ctrl_t* EmptyGroup() {
283 alignas(16) static constexpr ctrl_t empty_group[] = {
284 kSentinel, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty,
285 kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty, kEmpty};
286 return const_cast<ctrl_t*>(empty_group);
289 // Mixes a randomly generated per-process seed with `hash` and `ctrl` to
290 // randomize insertion order within groups.
291 bool ShouldInsertBackwards(size_t hash, ctrl_t* ctrl);
293 // Returns a hash seed.
295 // The seed consists of the ctrl_ pointer, which adds enough entropy to ensure
296 // non-determinism of iteration order in most cases.
297 inline size_t HashSeed(const ctrl_t* ctrl) {
298 // The low bits of the pointer have little or no entropy because of
299 // alignment. We shift the pointer to try to use higher entropy bits. A
300 // good number seems to be 12 bits, because that aligns with page size.
301 return reinterpret_cast<uintptr_t>(ctrl) >> 12;
304 inline size_t H1(size_t hash, const ctrl_t* ctrl) {
305 return (hash >> 7) ^ HashSeed(ctrl);
307 inline ctrl_t H2(size_t hash) { return hash & 0x7F; }
309 inline bool IsEmpty(ctrl_t c) { return c == kEmpty; }
310 inline bool IsFull(ctrl_t c) { return c >= 0; }
311 inline bool IsDeleted(ctrl_t c) { return c == kDeleted; }
312 inline bool IsEmptyOrDeleted(ctrl_t c) { return c < kSentinel; }
314 #if SWISSTABLE_HAVE_SSE2
316 // https://github.com/abseil/abseil-cpp/issues/209
317 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87853
318 // _mm_cmpgt_epi8 is broken under GCC with -funsigned-char
319 // Work around this by using the portable implementation of Group
320 // when using -funsigned-char under GCC.
321 inline __m128i _mm_cmpgt_epi8_fixed(__m128i a, __m128i b) {
322 #if defined(__GNUC__) && !defined(__clang__)
323 if (std::is_unsigned<char>::value) {
324 const __m128i mask = _mm_set1_epi8(0x80);
325 const __m128i diff = _mm_subs_epi8(b, a);
326 return _mm_cmpeq_epi8(_mm_and_si128(diff, mask), mask);
329 return _mm_cmpgt_epi8(a, b);
332 struct GroupSse2Impl {
333 static constexpr size_t kWidth = 16; // the number of slots per group
335 explicit GroupSse2Impl(const ctrl_t* pos) {
336 ctrl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
339 // Returns a bitmask representing the positions of slots that match hash.
340 BitMask<uint32_t, kWidth> Match(h2_t hash) const {
341 auto match = _mm_set1_epi8(hash);
342 return BitMask<uint32_t, kWidth>(
343 _mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl)));
346 // Returns a bitmask representing the positions of empty slots.
347 BitMask<uint32_t, kWidth> MatchEmpty() const {
348 #if SWISSTABLE_HAVE_SSSE3
349 // This only works because kEmpty is -128.
350 return BitMask<uint32_t, kWidth>(
351 _mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl)));
353 return Match(static_cast<h2_t>(kEmpty));
357 // Returns a bitmask representing the positions of empty or deleted slots.
358 BitMask<uint32_t, kWidth> MatchEmptyOrDeleted() const {
359 auto special = _mm_set1_epi8(kSentinel);
360 return BitMask<uint32_t, kWidth>(
361 _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)));
364 // Returns the number of trailing empty or deleted elements in the group.
365 uint32_t CountLeadingEmptyOrDeleted() const {
366 auto special = _mm_set1_epi8(kSentinel);
367 return TrailingZeros(
368 _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)) + 1);
371 void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const {
372 auto msbs = _mm_set1_epi8(static_cast<char>(-128));
373 auto x126 = _mm_set1_epi8(126);
374 #if SWISSTABLE_HAVE_SSSE3
375 auto res = _mm_or_si128(_mm_shuffle_epi8(x126, ctrl), msbs);
377 auto zero = _mm_setzero_si128();
378 auto special_mask = _mm_cmpgt_epi8_fixed(zero, ctrl);
379 auto res = _mm_or_si128(msbs, _mm_andnot_si128(special_mask, x126));
381 _mm_storeu_si128(reinterpret_cast<__m128i*>(dst), res);
386 #endif // SWISSTABLE_HAVE_SSE2
388 struct GroupPortableImpl {
389 static constexpr size_t kWidth = 8;
391 explicit GroupPortableImpl(const ctrl_t* pos)
392 : ctrl(little_endian::Load64(pos)) {}
394 BitMask<uint64_t, kWidth, 3> Match(h2_t hash) const {
395 // For the technique, see:
396 // http://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
397 // (Determine if a word has a byte equal to n).
399 // Caveat: there are false positives but:
400 // - they only occur if there is a real match
401 // - they never occur on kEmpty, kDeleted, kSentinel
402 // - they will be handled gracefully by subsequent checks in code
405 // v = 0x1716151413121110
407 // retval = (v - lsbs) & ~v & msbs = 0x0000000080800000
408 constexpr uint64_t msbs = 0x8080808080808080ULL;
409 constexpr uint64_t lsbs = 0x0101010101010101ULL;
410 auto x = ctrl ^ (lsbs * hash);
411 return BitMask<uint64_t, kWidth, 3>((x - lsbs) & ~x & msbs);
414 BitMask<uint64_t, kWidth, 3> MatchEmpty() const {
415 constexpr uint64_t msbs = 0x8080808080808080ULL;
416 return BitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 6)) & msbs);
419 BitMask<uint64_t, kWidth, 3> MatchEmptyOrDeleted() const {
420 constexpr uint64_t msbs = 0x8080808080808080ULL;
421 return BitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 7)) & msbs);
424 uint32_t CountLeadingEmptyOrDeleted() const {
425 constexpr uint64_t gaps = 0x00FEFEFEFEFEFEFEULL;
426 return (TrailingZeros(((~ctrl & (ctrl >> 7)) | gaps) + 1) + 7) >> 3;
429 void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const {
430 constexpr uint64_t msbs = 0x8080808080808080ULL;
431 constexpr uint64_t lsbs = 0x0101010101010101ULL;
432 auto x = ctrl & msbs;
433 auto res = (~x + (x >> 7)) & ~lsbs;
434 little_endian::Store64(dst, res);
440 #if SWISSTABLE_HAVE_SSE2
441 using Group = GroupSse2Impl;
443 using Group = GroupPortableImpl;
446 template <class Policy, class Hash, class Eq, class Alloc>
449 inline bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; }
452 // IsValidCapacity(capacity)
453 // ctrl[capacity] == kSentinel
454 // ctrl[i] != kSentinel for all i < capacity
455 // Applies mapping for every byte in ctrl:
459 inline void ConvertDeletedToEmptyAndFullToDeleted(
460 ctrl_t* ctrl, size_t capacity) {
461 assert(ctrl[capacity] == kSentinel);
462 assert(IsValidCapacity(capacity));
463 for (ctrl_t* pos = ctrl; pos != ctrl + capacity + 1; pos += Group::kWidth) {
464 Group{pos}.ConvertSpecialToEmptyAndFullToDeleted(pos);
466 // Copy the cloned ctrl bytes.
467 std::memcpy(ctrl + capacity + 1, ctrl, Group::kWidth);
468 ctrl[capacity] = kSentinel;
471 // Rounds up the capacity to the next power of 2 minus 1, with a minimum of 1.
472 inline size_t NormalizeCapacity(size_t n) {
473 return n ? ~size_t{} >> LeadingZeros(n) : 1;
476 // We use 7/8th as maximum load factor.
477 // For 16-wide groups, that gives an average of two empty slots per group.
478 inline size_t CapacityToGrowth(size_t capacity) {
479 assert(IsValidCapacity(capacity));
481 if (Group::kWidth == 8 && capacity == 7) {
482 // x-x/8 does not work when x==7.
485 return capacity - capacity / 8;
487 // From desired "growth" to a lowerbound of the necessary capacity.
488 // Might not be a valid one and required NormalizeCapacity().
489 inline size_t GrowthToLowerboundCapacity(size_t growth) {
491 if (Group::kWidth == 8 && growth == 7) {
492 // x+(x-1)/7 does not work when x==7.
495 return growth + static_cast<size_t>((static_cast<int64_t>(growth) - 1) / 7);
498 // Policy: a policy defines how to perform different operations on
499 // the slots of the hashtable (see hash_policy_traits.h for the full interface
502 // Hash: a (possibly polymorphic) functor that hashes keys of the hashtable. The
503 // functor should accept a key and return size_t as hash. For best performance
504 // it is important that the hash function provides high entropy across all bits
507 // Eq: a (possibly polymorphic) functor that compares two keys for equality. It
508 // should accept two (of possibly different type) keys and return a bool: true
509 // if they are equal, false if they are not. If two keys compare equal, then
510 // their hash values as defined by Hash MUST be equal.
512 // Allocator: an Allocator [https://devdocs.io/cpp/concept/allocator] with which
513 // the storage of the hashtable will be allocated and the elements will be
514 // constructed and destroyed.
515 template <class Policy, class Hash, class Eq, class Alloc>
517 using PolicyTraits = hash_policy_traits<Policy>;
519 KeyArg<IsTransparent<Eq>::value && IsTransparent<Hash>::value>;
522 using init_type = typename PolicyTraits::init_type;
523 using key_type = typename PolicyTraits::key_type;
524 // TODO(sbenza): Hide slot_type as it is an implementation detail. Needs user
526 using slot_type = typename PolicyTraits::slot_type;
527 using allocator_type = Alloc;
528 using size_type = size_t;
529 using difference_type = ptrdiff_t;
531 using key_equal = Eq;
532 using policy_type = Policy;
533 using value_type = typename PolicyTraits::value_type;
534 using reference = value_type&;
535 using const_reference = const value_type&;
536 using pointer = typename absl::allocator_traits<
537 allocator_type>::template rebind_traits<value_type>::pointer;
538 using const_pointer = typename absl::allocator_traits<
539 allocator_type>::template rebind_traits<value_type>::const_pointer;
541 // Alias used for heterogeneous lookup functions.
542 // `key_arg<K>` evaluates to `K` when the functors are transparent and to
543 // `key_type` otherwise. It permits template argument deduction on `K` for the
546 using key_arg = typename KeyArgImpl::template type<K, key_type>;
549 // Give an early error when key_type is not hashable/eq.
550 auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k));
551 auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k));
553 using Layout = absl::container_internal::Layout<ctrl_t, slot_type>;
555 static Layout MakeLayout(size_t capacity) {
556 assert(IsValidCapacity(capacity));
557 return Layout(capacity + Group::kWidth + 1, capacity);
560 using AllocTraits = absl::allocator_traits<allocator_type>;
561 using SlotAlloc = typename absl::allocator_traits<
562 allocator_type>::template rebind_alloc<slot_type>;
563 using SlotAllocTraits = typename absl::allocator_traits<
564 allocator_type>::template rebind_traits<slot_type>;
566 static_assert(std::is_lvalue_reference<reference>::value,
567 "Policy::element() must return a reference");
569 template <typename T>
570 struct SameAsElementReference
571 : std::is_same<typename std::remove_cv<
572 typename std::remove_reference<reference>::type>::type,
573 typename std::remove_cv<
574 typename std::remove_reference<T>::type>::type> {};
576 // An enabler for insert(T&&): T must be convertible to init_type or be the
577 // same as [cv] value_type [ref].
578 // Note: we separate SameAsElementReference into its own type to avoid using
579 // reference unless we need to. MSVC doesn't seem to like it in some
582 using RequiresInsertable = typename std::enable_if<
583 absl::disjunction<std::is_convertible<T, init_type>,
584 SameAsElementReference<T>>::value,
587 // RequiresNotInit is a workaround for gcc prior to 7.1.
588 // See https://godbolt.org/g/Y4xsUh.
590 using RequiresNotInit =
591 typename std::enable_if<!std::is_same<T, init_type>::value, int>::type;
593 template <class... Ts>
594 using IsDecomposable = IsDecomposable<void, PolicyTraits, Hash, Eq, Ts...>;
597 static_assert(std::is_same<pointer, value_type*>::value,
598 "Allocators with custom pointer types are not supported");
599 static_assert(std::is_same<const_pointer, const value_type*>::value,
600 "Allocators with custom pointer types are not supported");
603 friend class raw_hash_set;
606 using iterator_category = std::forward_iterator_tag;
607 using value_type = typename raw_hash_set::value_type;
609 absl::conditional_t<PolicyTraits::constant_iterators::value,
610 const value_type&, value_type&>;
611 using pointer = absl::remove_reference_t<reference>*;
612 using difference_type = typename raw_hash_set::difference_type;
616 // PRECONDITION: not an end() iterator.
617 reference operator*() const { return PolicyTraits::element(slot_); }
619 // PRECONDITION: not an end() iterator.
620 pointer operator->() const { return &operator*(); }
622 // PRECONDITION: not an end() iterator.
623 iterator& operator++() {
626 skip_empty_or_deleted();
629 // PRECONDITION: not an end() iterator.
630 iterator operator++(int) {
636 friend bool operator==(const iterator& a, const iterator& b) {
637 return a.ctrl_ == b.ctrl_;
639 friend bool operator!=(const iterator& a, const iterator& b) {
644 iterator(ctrl_t* ctrl) : ctrl_(ctrl) {} // for end()
645 iterator(ctrl_t* ctrl, slot_type* slot) : ctrl_(ctrl), slot_(slot) {}
647 void skip_empty_or_deleted() {
648 while (IsEmptyOrDeleted(*ctrl_)) {
649 // ctrl is not necessarily aligned to Group::kWidth. It is also likely
650 // to read past the space for ctrl bytes and into slots. This is ok
651 // because ctrl has sizeof() == 1 and slot has sizeof() >= 1 so there
652 // is no way to read outside the combined slot array.
653 uint32_t shift = Group{ctrl_}.CountLeadingEmptyOrDeleted();
659 ctrl_t* ctrl_ = nullptr;
660 // To avoid uninitialized member warnigs, put slot_ in an anonymous union.
661 // The member is not initialized on singleton and end iterators.
667 class const_iterator {
668 friend class raw_hash_set;
671 using iterator_category = typename iterator::iterator_category;
672 using value_type = typename raw_hash_set::value_type;
673 using reference = typename raw_hash_set::const_reference;
674 using pointer = typename raw_hash_set::const_pointer;
675 using difference_type = typename raw_hash_set::difference_type;
678 // Implicit construction from iterator.
679 const_iterator(iterator i) : inner_(std::move(i)) {}
681 reference operator*() const { return *inner_; }
682 pointer operator->() const { return inner_.operator->(); }
684 const_iterator& operator++() {
688 const_iterator operator++(int) { return inner_++; }
690 friend bool operator==(const const_iterator& a, const const_iterator& b) {
691 return a.inner_ == b.inner_;
693 friend bool operator!=(const const_iterator& a, const const_iterator& b) {
698 const_iterator(const ctrl_t* ctrl, const slot_type* slot)
699 : inner_(const_cast<ctrl_t*>(ctrl), const_cast<slot_type*>(slot)) {}
704 using node_type = node_handle<Policy, hash_policy_traits<Policy>, Alloc>;
705 using insert_return_type = InsertReturnType<iterator, node_type>;
707 raw_hash_set() noexcept(
708 std::is_nothrow_default_constructible<hasher>::value&&
709 std::is_nothrow_default_constructible<key_equal>::value&&
710 std::is_nothrow_default_constructible<allocator_type>::value) {}
712 explicit raw_hash_set(size_t bucket_count, const hasher& hash = hasher(),
713 const key_equal& eq = key_equal(),
714 const allocator_type& alloc = allocator_type())
715 : ctrl_(EmptyGroup()), settings_(0, hash, eq, alloc) {
717 capacity_ = NormalizeCapacity(bucket_count);
723 raw_hash_set(size_t bucket_count, const hasher& hash,
724 const allocator_type& alloc)
725 : raw_hash_set(bucket_count, hash, key_equal(), alloc) {}
727 raw_hash_set(size_t bucket_count, const allocator_type& alloc)
728 : raw_hash_set(bucket_count, hasher(), key_equal(), alloc) {}
730 explicit raw_hash_set(const allocator_type& alloc)
731 : raw_hash_set(0, hasher(), key_equal(), alloc) {}
733 template <class InputIter>
734 raw_hash_set(InputIter first, InputIter last, size_t bucket_count = 0,
735 const hasher& hash = hasher(), const key_equal& eq = key_equal(),
736 const allocator_type& alloc = allocator_type())
737 : raw_hash_set(bucket_count, hash, eq, alloc) {
741 template <class InputIter>
742 raw_hash_set(InputIter first, InputIter last, size_t bucket_count,
743 const hasher& hash, const allocator_type& alloc)
744 : raw_hash_set(first, last, bucket_count, hash, key_equal(), alloc) {}
746 template <class InputIter>
747 raw_hash_set(InputIter first, InputIter last, size_t bucket_count,
748 const allocator_type& alloc)
749 : raw_hash_set(first, last, bucket_count, hasher(), key_equal(), alloc) {}
751 template <class InputIter>
752 raw_hash_set(InputIter first, InputIter last, const allocator_type& alloc)
753 : raw_hash_set(first, last, 0, hasher(), key_equal(), alloc) {}
755 // Instead of accepting std::initializer_list<value_type> as the first
756 // argument like std::unordered_set<value_type> does, we have two overloads
757 // that accept std::initializer_list<T> and std::initializer_list<init_type>.
758 // This is advantageous for performance.
760 // // Turns {"abc", "def"} into std::initializer_list<std::string>, then
761 // // copies the strings into the set.
762 // std::unordered_set<std::string> s = {"abc", "def"};
764 // // Turns {"abc", "def"} into std::initializer_list<const char*>, then
765 // // copies the strings into the set.
766 // absl::flat_hash_set<std::string> s = {"abc", "def"};
768 // The same trick is used in insert().
770 // The enabler is necessary to prevent this constructor from triggering where
771 // the copy constructor is meant to be called.
773 // absl::flat_hash_set<int> a, b{a};
775 // RequiresNotInit<T> is a workaround for gcc prior to 7.1.
776 template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
777 raw_hash_set(std::initializer_list<T> init, size_t bucket_count = 0,
778 const hasher& hash = hasher(), const key_equal& eq = key_equal(),
779 const allocator_type& alloc = allocator_type())
780 : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {}
782 raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count = 0,
783 const hasher& hash = hasher(), const key_equal& eq = key_equal(),
784 const allocator_type& alloc = allocator_type())
785 : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {}
787 template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
788 raw_hash_set(std::initializer_list<T> init, size_t bucket_count,
789 const hasher& hash, const allocator_type& alloc)
790 : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {}
792 raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count,
793 const hasher& hash, const allocator_type& alloc)
794 : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {}
796 template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
797 raw_hash_set(std::initializer_list<T> init, size_t bucket_count,
798 const allocator_type& alloc)
799 : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {}
801 raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count,
802 const allocator_type& alloc)
803 : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {}
805 template <class T, RequiresNotInit<T> = 0, RequiresInsertable<T> = 0>
806 raw_hash_set(std::initializer_list<T> init, const allocator_type& alloc)
807 : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {}
809 raw_hash_set(std::initializer_list<init_type> init,
810 const allocator_type& alloc)
811 : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {}
813 raw_hash_set(const raw_hash_set& that)
814 : raw_hash_set(that, AllocTraits::select_on_container_copy_construction(
815 that.alloc_ref())) {}
817 raw_hash_set(const raw_hash_set& that, const allocator_type& a)
818 : raw_hash_set(0, that.hash_ref(), that.eq_ref(), a) {
819 reserve(that.size());
820 // Because the table is guaranteed to be empty, we can do something faster
821 // than a full `insert`.
822 for (const auto& v : that) {
823 const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v);
824 auto target = find_first_non_full(hash);
825 set_ctrl(target.offset, H2(hash));
826 emplace_at(target.offset, v);
827 infoz_.RecordInsert(hash, target.probe_length);
830 growth_left() -= that.size();
833 raw_hash_set(raw_hash_set&& that) noexcept(
834 std::is_nothrow_copy_constructible<hasher>::value&&
835 std::is_nothrow_copy_constructible<key_equal>::value&&
836 std::is_nothrow_copy_constructible<allocator_type>::value)
837 : ctrl_(absl::exchange(that.ctrl_, EmptyGroup())),
838 slots_(absl::exchange(that.slots_, nullptr)),
839 size_(absl::exchange(that.size_, 0)),
840 capacity_(absl::exchange(that.capacity_, 0)),
841 infoz_(absl::exchange(that.infoz_, HashtablezInfoHandle())),
842 // Hash, equality and allocator are copied instead of moved because
843 // `that` must be left valid. If Hash is std::function<Key>, moving it
844 // would create a nullptr functor that cannot be called.
845 settings_(that.settings_) {
846 // growth_left was copied above, reset the one from `that`.
847 that.growth_left() = 0;
850 raw_hash_set(raw_hash_set&& that, const allocator_type& a)
851 : ctrl_(EmptyGroup()),
855 settings_(0, that.hash_ref(), that.eq_ref(), a) {
856 if (a == that.alloc_ref()) {
857 std::swap(ctrl_, that.ctrl_);
858 std::swap(slots_, that.slots_);
859 std::swap(size_, that.size_);
860 std::swap(capacity_, that.capacity_);
861 std::swap(growth_left(), that.growth_left());
862 std::swap(infoz_, that.infoz_);
864 reserve(that.size());
865 // Note: this will copy elements of dense_set and unordered_set instead of
866 // moving them. This can be fixed if it ever becomes an issue.
867 for (auto& elem : that) insert(std::move(elem));
871 raw_hash_set& operator=(const raw_hash_set& that) {
872 raw_hash_set tmp(that,
873 AllocTraits::propagate_on_container_copy_assignment::value
880 raw_hash_set& operator=(raw_hash_set&& that) noexcept(
881 absl::allocator_traits<allocator_type>::is_always_equal::value&&
882 std::is_nothrow_move_assignable<hasher>::value&&
883 std::is_nothrow_move_assignable<key_equal>::value) {
884 // TODO(sbenza): We should only use the operations from the noexcept clause
885 // to make sure we actually adhere to that contract.
888 typename AllocTraits::propagate_on_container_move_assignment());
891 ~raw_hash_set() { destroy_slots(); }
894 auto it = iterator_at(0);
895 it.skip_empty_or_deleted();
898 iterator end() { return {ctrl_ + capacity_}; }
900 const_iterator begin() const {
901 return const_cast<raw_hash_set*>(this)->begin();
903 const_iterator end() const { return const_cast<raw_hash_set*>(this)->end(); }
904 const_iterator cbegin() const { return begin(); }
905 const_iterator cend() const { return end(); }
907 bool empty() const { return !size(); }
908 size_t size() const { return size_; }
909 size_t capacity() const { return capacity_; }
910 size_t max_size() const { return (std::numeric_limits<size_t>::max)(); }
912 ABSL_ATTRIBUTE_REINITIALIZES void clear() {
913 // Iterating over this container is O(bucket_count()). When bucket_count()
914 // is much greater than size(), iteration becomes prohibitively expensive.
915 // For clear() it is more important to reuse the allocated array when the
916 // container is small because allocation takes comparatively long time
917 // compared to destruction of the elements of the container. So we pick the
918 // largest bucket_count() threshold for which iteration is still fast and
919 // past that we simply deallocate the array.
920 if (capacity_ > 127) {
922 } else if (capacity_) {
923 for (size_t i = 0; i != capacity_; ++i) {
924 if (IsFull(ctrl_[i])) {
925 PolicyTraits::destroy(&alloc_ref(), slots_ + i);
933 infoz_.RecordStorageChanged(0, capacity_);
936 // This overload kicks in when the argument is an rvalue of insertable and
937 // decomposable type other than init_type.
939 // flat_hash_map<std::string, int> m;
940 // m.insert(std::make_pair("abc", 42));
941 template <class T, RequiresInsertable<T> = 0,
942 typename std::enable_if<IsDecomposable<T>::value, int>::type = 0,
944 std::pair<iterator, bool> insert(T&& value) {
945 return emplace(std::forward<T>(value));
948 // This overload kicks in when the argument is a bitfield or an lvalue of
949 // insertable and decomposable type.
951 // union { int n : 1; };
952 // flat_hash_set<int> s;
955 // flat_hash_set<std::string> s;
956 // const char* p = "hello";
959 // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace
960 // RequiresInsertable<T> with RequiresInsertable<const T&>.
961 // We are hitting this bug: https://godbolt.org/g/1Vht4f.
963 class T, RequiresInsertable<T> = 0,
964 typename std::enable_if<IsDecomposable<const T&>::value, int>::type = 0>
965 std::pair<iterator, bool> insert(const T& value) {
966 return emplace(value);
969 // This overload kicks in when the argument is an rvalue of init_type. Its
970 // purpose is to handle brace-init-list arguments.
972 // flat_hash_map<std::string, int> s;
973 // s.insert({"abc", 42});
974 std::pair<iterator, bool> insert(init_type&& value) {
975 return emplace(std::move(value));
978 template <class T, RequiresInsertable<T> = 0,
979 typename std::enable_if<IsDecomposable<T>::value, int>::type = 0,
981 iterator insert(const_iterator, T&& value) {
982 return insert(std::forward<T>(value)).first;
985 // TODO(romanp): Once we stop supporting gcc 5.1 and below, replace
986 // RequiresInsertable<T> with RequiresInsertable<const T&>.
987 // We are hitting this bug: https://godbolt.org/g/1Vht4f.
989 class T, RequiresInsertable<T> = 0,
990 typename std::enable_if<IsDecomposable<const T&>::value, int>::type = 0>
991 iterator insert(const_iterator, const T& value) {
992 return insert(value).first;
995 iterator insert(const_iterator, init_type&& value) {
996 return insert(std::move(value)).first;
999 template <class InputIt>
1000 void insert(InputIt first, InputIt last) {
1001 for (; first != last; ++first) insert(*first);
1004 template <class T, RequiresNotInit<T> = 0, RequiresInsertable<const T&> = 0>
1005 void insert(std::initializer_list<T> ilist) {
1006 insert(ilist.begin(), ilist.end());
1009 void insert(std::initializer_list<init_type> ilist) {
1010 insert(ilist.begin(), ilist.end());
1013 insert_return_type insert(node_type&& node) {
1014 if (!node) return {end(), false, node_type()};
1015 const auto& elem = PolicyTraits::element(CommonAccess::GetSlot(node));
1016 auto res = PolicyTraits::apply(
1017 InsertSlot<false>{*this, std::move(*CommonAccess::GetSlot(node))},
1020 CommonAccess::Reset(&node);
1021 return {res.first, true, node_type()};
1023 return {res.first, false, std::move(node)};
1027 iterator insert(const_iterator, node_type&& node) {
1028 return insert(std::move(node)).first;
1031 // This overload kicks in if we can deduce the key from args. This enables us
1032 // to avoid constructing value_type if an entry with the same key already
1037 // flat_hash_map<std::string, std::string> m = {{"abc", "def"}};
1038 // // Creates no std::string copies and makes no heap allocations.
1039 // m.emplace("abc", "xyz");
1040 template <class... Args, typename std::enable_if<
1041 IsDecomposable<Args...>::value, int>::type = 0>
1042 std::pair<iterator, bool> emplace(Args&&... args) {
1043 return PolicyTraits::apply(EmplaceDecomposable{*this},
1044 std::forward<Args>(args)...);
1047 // This overload kicks in if we cannot deduce the key from args. It constructs
1048 // value_type unconditionally and then either moves it into the table or
1050 template <class... Args, typename std::enable_if<
1051 !IsDecomposable<Args...>::value, int>::type = 0>
1052 std::pair<iterator, bool> emplace(Args&&... args) {
1053 typename std::aligned_storage<sizeof(slot_type), alignof(slot_type)>::type
1055 slot_type* slot = reinterpret_cast<slot_type*>(&raw);
1057 PolicyTraits::construct(&alloc_ref(), slot, std::forward<Args>(args)...);
1058 const auto& elem = PolicyTraits::element(slot);
1059 return PolicyTraits::apply(InsertSlot<true>{*this, std::move(*slot)}, elem);
1062 template <class... Args>
1063 iterator emplace_hint(const_iterator, Args&&... args) {
1064 return emplace(std::forward<Args>(args)...).first;
1067 // Extension API: support for lazy emplace.
1069 // Looks up key in the table. If found, returns the iterator to the element.
1070 // Otherwise calls f with one argument of type raw_hash_set::constructor. f
1071 // MUST call raw_hash_set::constructor with arguments as if a
1072 // raw_hash_set::value_type is constructed, otherwise the behavior is
1077 // std::unordered_set<ArenaString> s;
1078 // // Makes ArenaStr even if "abc" is in the map.
1079 // s.insert(ArenaString(&arena, "abc"));
1081 // flat_hash_set<ArenaStr> s;
1082 // // Makes ArenaStr only if "abc" is not in the map.
1083 // s.lazy_emplace("abc", [&](const constructor& ctor) {
1084 // ctor(&arena, "abc");
1087 // WARNING: This API is currently experimental. If there is a way to implement
1088 // the same thing with the rest of the API, prefer that.
1090 friend class raw_hash_set;
1093 template <class... Args>
1094 void operator()(Args&&... args) const {
1096 PolicyTraits::construct(alloc_, *slot_, std::forward<Args>(args)...);
1101 constructor(allocator_type* a, slot_type** slot) : alloc_(a), slot_(slot) {}
1103 allocator_type* alloc_;
1107 template <class K = key_type, class F>
1108 iterator lazy_emplace(const key_arg<K>& key, F&& f) {
1109 auto res = find_or_prepare_insert(key);
1111 slot_type* slot = slots_ + res.first;
1112 std::forward<F>(f)(constructor(&alloc_ref(), &slot));
1115 return iterator_at(res.first);
1118 // Extension API: support for heterogeneous keys.
1120 // std::unordered_set<std::string> s;
1121 // // Turns "abc" into std::string.
1124 // flat_hash_set<std::string> s;
1125 // // Uses "abc" directly without copying it into std::string.
1127 template <class K = key_type>
1128 size_type erase(const key_arg<K>& key) {
1129 auto it = find(key);
1130 if (it == end()) return 0;
1135 // Erases the element pointed to by `it`. Unlike `std::unordered_set::erase`,
1136 // this method returns void to reduce algorithmic complexity to O(1). In
1137 // order to erase while iterating across a map, use the following idiom (which
1138 // also works for standard containers):
1140 // for (auto it = m.begin(), end = m.end(); it != end;) {
1147 void erase(const_iterator cit) { erase(cit.inner_); }
1149 // This overload is necessary because otherwise erase<K>(const K&) would be
1150 // a better match if non-const iterator is passed as an argument.
1151 void erase(iterator it) {
1152 assert(it != end());
1153 PolicyTraits::destroy(&alloc_ref(), it.slot_);
1154 erase_meta_only(it);
1157 iterator erase(const_iterator first, const_iterator last) {
1158 while (first != last) {
1164 // Moves elements from `src` into `this`.
1165 // If the element already exists in `this`, it is left unmodified in `src`.
1166 template <typename H, typename E>
1167 void merge(raw_hash_set<Policy, H, E, Alloc>& src) { // NOLINT
1168 assert(this != &src);
1169 for (auto it = src.begin(), e = src.end(); it != e; ++it) {
1170 if (PolicyTraits::apply(InsertSlot<false>{*this, std::move(*it.slot_)},
1171 PolicyTraits::element(it.slot_))
1173 src.erase_meta_only(it);
1178 template <typename H, typename E>
1179 void merge(raw_hash_set<Policy, H, E, Alloc>&& src) {
1183 node_type extract(const_iterator position) {
1185 CommonAccess::Make<node_type>(alloc_ref(), position.inner_.slot_);
1186 erase_meta_only(position);
1192 typename std::enable_if<!std::is_same<K, iterator>::value, int>::type = 0>
1193 node_type extract(const key_arg<K>& key) {
1194 auto it = find(key);
1195 return it == end() ? node_type() : extract(const_iterator{it});
1198 void swap(raw_hash_set& that) noexcept(
1199 IsNoThrowSwappable<hasher>() && IsNoThrowSwappable<key_equal>() &&
1200 (!AllocTraits::propagate_on_container_swap::value ||
1201 IsNoThrowSwappable<allocator_type>())) {
1203 swap(ctrl_, that.ctrl_);
1204 swap(slots_, that.slots_);
1205 swap(size_, that.size_);
1206 swap(capacity_, that.capacity_);
1207 swap(growth_left(), that.growth_left());
1208 swap(hash_ref(), that.hash_ref());
1209 swap(eq_ref(), that.eq_ref());
1210 swap(infoz_, that.infoz_);
1211 if (AllocTraits::propagate_on_container_swap::value) {
1212 swap(alloc_ref(), that.alloc_ref());
1214 // If the allocators do not compare equal it is officially undefined
1215 // behavior. We choose to do nothing.
1219 void rehash(size_t n) {
1220 if (n == 0 && capacity_ == 0) return;
1221 if (n == 0 && size_ == 0) {
1223 infoz_.RecordStorageChanged(0, 0);
1226 // bitor is a faster way of doing `max` here. We will round up to the next
1227 // power-of-2-minus-1, so bitor is good enough.
1228 auto m = NormalizeCapacity(n | GrowthToLowerboundCapacity(size()));
1229 // n == 0 unconditionally rehashes as per the standard.
1230 if (n == 0 || m > capacity_) {
1235 void reserve(size_t n) { rehash(GrowthToLowerboundCapacity(n)); }
1237 // Extension API: support for heterogeneous keys.
1239 // std::unordered_set<std::string> s;
1240 // // Turns "abc" into std::string.
1243 // ch_set<std::string> s;
1244 // // Uses "abc" directly without copying it into std::string.
1246 template <class K = key_type>
1247 size_t count(const key_arg<K>& key) const {
1248 return find(key) == end() ? 0 : 1;
1251 // Issues CPU prefetch instructions for the memory needed to find or insert
1252 // a key. Like all lookup functions, this support heterogeneous keys.
1254 // NOTE: This is a very low level operation and should not be used without
1255 // specific benchmarks indicating its importance.
1256 template <class K = key_type>
1257 void prefetch(const key_arg<K>& key) const {
1259 #if defined(__GNUC__)
1260 auto seq = probe(hash_ref()(key));
1261 __builtin_prefetch(static_cast<const void*>(ctrl_ + seq.offset()));
1262 __builtin_prefetch(static_cast<const void*>(slots_ + seq.offset()));
1266 // The API of find() has two extensions.
1268 // 1. The hash can be passed by the user. It must be equal to the hash of the
1271 // 2. The type of the key argument doesn't have to be key_type. This is so
1272 // called heterogeneous key support.
1273 template <class K = key_type>
1274 iterator find(const key_arg<K>& key, size_t hash) {
1275 auto seq = probe(hash);
1277 Group g{ctrl_ + seq.offset()};
1278 for (int i : g.Match(H2(hash))) {
1279 if (ABSL_PREDICT_TRUE(PolicyTraits::apply(
1280 EqualElement<K>{key, eq_ref()},
1281 PolicyTraits::element(slots_ + seq.offset(i)))))
1282 return iterator_at(seq.offset(i));
1284 if (ABSL_PREDICT_TRUE(g.MatchEmpty())) return end();
1288 template <class K = key_type>
1289 iterator find(const key_arg<K>& key) {
1290 return find(key, hash_ref()(key));
1293 template <class K = key_type>
1294 const_iterator find(const key_arg<K>& key, size_t hash) const {
1295 return const_cast<raw_hash_set*>(this)->find(key, hash);
1297 template <class K = key_type>
1298 const_iterator find(const key_arg<K>& key) const {
1299 return find(key, hash_ref()(key));
1302 template <class K = key_type>
1303 bool contains(const key_arg<K>& key) const {
1304 return find(key) != end();
1307 template <class K = key_type>
1308 std::pair<iterator, iterator> equal_range(const key_arg<K>& key) {
1309 auto it = find(key);
1310 if (it != end()) return {it, std::next(it)};
1313 template <class K = key_type>
1314 std::pair<const_iterator, const_iterator> equal_range(
1315 const key_arg<K>& key) const {
1316 auto it = find(key);
1317 if (it != end()) return {it, std::next(it)};
1321 size_t bucket_count() const { return capacity_; }
1322 float load_factor() const {
1323 return capacity_ ? static_cast<double>(size()) / capacity_ : 0.0;
1325 float max_load_factor() const { return 1.0f; }
1326 void max_load_factor(float) {
1330 hasher hash_function() const { return hash_ref(); }
1331 key_equal key_eq() const { return eq_ref(); }
1332 allocator_type get_allocator() const { return alloc_ref(); }
1334 friend bool operator==(const raw_hash_set& a, const raw_hash_set& b) {
1335 if (a.size() != b.size()) return false;
1336 const raw_hash_set* outer = &a;
1337 const raw_hash_set* inner = &b;
1338 if (outer->capacity() > inner->capacity()) std::swap(outer, inner);
1339 for (const value_type& elem : *outer)
1340 if (!inner->has_element(elem)) return false;
1344 friend bool operator!=(const raw_hash_set& a, const raw_hash_set& b) {
1348 friend void swap(raw_hash_set& a,
1349 raw_hash_set& b) noexcept(noexcept(a.swap(b))) {
1354 template <class Container, typename Enabler>
1355 friend struct absl::container_internal::hashtable_debug_internal::
1356 HashtableDebugAccess;
1358 struct FindElement {
1359 template <class K, class... Args>
1360 const_iterator operator()(const K& key, Args&&...) const {
1363 const raw_hash_set& s;
1366 struct HashElement {
1367 template <class K, class... Args>
1368 size_t operator()(const K& key, Args&&...) const {
1375 struct EqualElement {
1376 template <class K2, class... Args>
1377 bool operator()(const K2& lhs, Args&&...) const {
1378 return eq(lhs, rhs);
1381 const key_equal& eq;
1384 struct EmplaceDecomposable {
1385 template <class K, class... Args>
1386 std::pair<iterator, bool> operator()(const K& key, Args&&... args) const {
1387 auto res = s.find_or_prepare_insert(key);
1389 s.emplace_at(res.first, std::forward<Args>(args)...);
1391 return {s.iterator_at(res.first), res.second};
1396 template <bool do_destroy>
1398 template <class K, class... Args>
1399 std::pair<iterator, bool> operator()(const K& key, Args&&...) && {
1400 auto res = s.find_or_prepare_insert(key);
1402 PolicyTraits::transfer(&s.alloc_ref(), s.slots_ + res.first, &slot);
1403 } else if (do_destroy) {
1404 PolicyTraits::destroy(&s.alloc_ref(), &slot);
1406 return {s.iterator_at(res.first), res.second};
1409 // Constructed slot. Either moved into place or destroyed.
1413 // "erases" the object from the container, except that it doesn't actually
1414 // destroy the object. It only updates all the metadata of the class.
1415 // This can be used in conjunction with Policy::transfer to move the object to
1417 void erase_meta_only(const_iterator it) {
1418 assert(IsFull(*it.inner_.ctrl_) && "erasing a dangling iterator");
1420 const size_t index = it.inner_.ctrl_ - ctrl_;
1421 const size_t index_before = (index - Group::kWidth) & capacity_;
1422 const auto empty_after = Group(it.inner_.ctrl_).MatchEmpty();
1423 const auto empty_before = Group(ctrl_ + index_before).MatchEmpty();
1425 // We count how many consecutive non empties we have to the right and to the
1426 // left of `it`. If the sum is >= kWidth then there is at least one probe
1427 // window that might have seen a full group.
1428 bool was_never_full =
1429 empty_before && empty_after &&
1430 static_cast<size_t>(empty_after.TrailingZeros() +
1431 empty_before.LeadingZeros()) < Group::kWidth;
1433 set_ctrl(index, was_never_full ? kEmpty : kDeleted);
1434 growth_left() += was_never_full;
1435 infoz_.RecordErase();
1438 void initialize_slots() {
1440 // Folks with custom allocators often make unwarranted assumptions about the
1441 // behavior of their classes vis-a-vis trivial destructability and what
1442 // calls they will or wont make. Avoid sampling for people with custom
1443 // allocators to get us out of this mess. This is not a hard guarantee but
1444 // a workaround while we plan the exact guarantee we want to provide.
1446 // People are often sloppy with the exact type of their allocator (sometimes
1447 // it has an extra const or is missing the pair, but rebinds made it work
1448 // anyway). To avoid the ambiguity, we work off SlotAlloc which we have
1449 // bound more carefully.
1450 if (std::is_same<SlotAlloc, std::allocator<slot_type>>::value &&
1451 slots_ == nullptr) {
1455 auto layout = MakeLayout(capacity_);
1456 char* mem = static_cast<char*>(
1457 Allocate<Layout::Alignment()>(&alloc_ref(), layout.AllocSize()));
1458 ctrl_ = reinterpret_cast<ctrl_t*>(layout.template Pointer<0>(mem));
1459 slots_ = layout.template Pointer<1>(mem);
1461 reset_growth_left();
1462 infoz_.RecordStorageChanged(size_, capacity_);
1465 void destroy_slots() {
1466 if (!capacity_) return;
1467 for (size_t i = 0; i != capacity_; ++i) {
1468 if (IsFull(ctrl_[i])) {
1469 PolicyTraits::destroy(&alloc_ref(), slots_ + i);
1472 auto layout = MakeLayout(capacity_);
1473 // Unpoison before returning the memory to the allocator.
1474 SanitizerUnpoisonMemoryRegion(slots_, sizeof(slot_type) * capacity_);
1475 Deallocate<Layout::Alignment()>(&alloc_ref(), ctrl_, layout.AllocSize());
1476 ctrl_ = EmptyGroup();
1483 void resize(size_t new_capacity) {
1484 assert(IsValidCapacity(new_capacity));
1485 auto* old_ctrl = ctrl_;
1486 auto* old_slots = slots_;
1487 const size_t old_capacity = capacity_;
1488 capacity_ = new_capacity;
1491 size_t total_probe_length = 0;
1492 for (size_t i = 0; i != old_capacity; ++i) {
1493 if (IsFull(old_ctrl[i])) {
1494 size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
1495 PolicyTraits::element(old_slots + i));
1496 auto target = find_first_non_full(hash);
1497 size_t new_i = target.offset;
1498 total_probe_length += target.probe_length;
1499 set_ctrl(new_i, H2(hash));
1500 PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, old_slots + i);
1504 SanitizerUnpoisonMemoryRegion(old_slots,
1505 sizeof(slot_type) * old_capacity);
1506 auto layout = MakeLayout(old_capacity);
1507 Deallocate<Layout::Alignment()>(&alloc_ref(), old_ctrl,
1508 layout.AllocSize());
1510 infoz_.RecordRehash(total_probe_length);
1513 void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE {
1514 assert(IsValidCapacity(capacity_));
1515 assert(!is_small());
1517 // - mark all DELETED slots as EMPTY
1518 // - mark all FULL slots as DELETED
1519 // - for each slot marked as DELETED
1520 // hash = Hash(element)
1521 // target = find_first_non_full(hash)
1522 // if target is in the same group
1523 // mark slot as FULL
1524 // else if target is EMPTY
1525 // transfer element to target
1526 // mark slot as EMPTY
1527 // mark target as FULL
1528 // else if target is DELETED
1529 // swap current element with target element
1530 // mark target as FULL
1531 // repeat procedure for current slot with moved from element (target)
1532 ConvertDeletedToEmptyAndFullToDeleted(ctrl_, capacity_);
1533 typename std::aligned_storage<sizeof(slot_type), alignof(slot_type)>::type
1535 size_t total_probe_length = 0;
1536 slot_type* slot = reinterpret_cast<slot_type*>(&raw);
1537 for (size_t i = 0; i != capacity_; ++i) {
1538 if (!IsDeleted(ctrl_[i])) continue;
1539 size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
1540 PolicyTraits::element(slots_ + i));
1541 auto target = find_first_non_full(hash);
1542 size_t new_i = target.offset;
1543 total_probe_length += target.probe_length;
1545 // Verify if the old and new i fall within the same group wrt the hash.
1546 // If they do, we don't need to move the object as it falls already in the
1547 // best probe we can.
1548 const auto probe_index = [&](size_t pos) {
1549 return ((pos - probe(hash).offset()) & capacity_) / Group::kWidth;
1552 // Element doesn't move.
1553 if (ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) {
1554 set_ctrl(i, H2(hash));
1557 if (IsEmpty(ctrl_[new_i])) {
1558 // Transfer element to the empty spot.
1559 // set_ctrl poisons/unpoisons the slots so we have to call it at the
1561 set_ctrl(new_i, H2(hash));
1562 PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slots_ + i);
1563 set_ctrl(i, kEmpty);
1565 assert(IsDeleted(ctrl_[new_i]));
1566 set_ctrl(new_i, H2(hash));
1567 // Until we are done rehashing, DELETED marks previously FULL slots.
1568 // Swap i and new_i elements.
1569 PolicyTraits::transfer(&alloc_ref(), slot, slots_ + i);
1570 PolicyTraits::transfer(&alloc_ref(), slots_ + i, slots_ + new_i);
1571 PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, slot);
1575 reset_growth_left();
1576 infoz_.RecordRehash(total_probe_length);
1579 void rehash_and_grow_if_necessary() {
1580 if (capacity_ == 0) {
1582 } else if (size() <= CapacityToGrowth(capacity()) / 2) {
1583 // Squash DELETED without growing if there is enough capacity.
1584 drop_deletes_without_resize();
1586 // Otherwise grow the container.
1587 resize(capacity_ * 2 + 1);
1591 bool has_element(const value_type& elem) const {
1592 size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, elem);
1593 auto seq = probe(hash);
1595 Group g{ctrl_ + seq.offset()};
1596 for (int i : g.Match(H2(hash))) {
1597 if (ABSL_PREDICT_TRUE(PolicyTraits::element(slots_ + seq.offset(i)) ==
1601 if (ABSL_PREDICT_TRUE(g.MatchEmpty())) return false;
1603 assert(seq.index() < capacity_ && "full table!");
1608 // Probes the raw_hash_set with the probe sequence for hash and returns the
1609 // pointer to the first empty or deleted slot.
1610 // NOTE: this function must work with tables having both kEmpty and kDelete
1611 // in one group. Such tables appears during drop_deletes_without_resize.
1613 // This function is very useful when insertions happen and:
1614 // - the input is already a set
1615 // - there are enough slots
1616 // - the element with the hash is not in the table
1619 size_t probe_length;
1621 FindInfo find_first_non_full(size_t hash) {
1622 auto seq = probe(hash);
1624 Group g{ctrl_ + seq.offset()};
1625 auto mask = g.MatchEmptyOrDeleted();
1627 #if !defined(NDEBUG)
1628 // We want to add entropy even when ASLR is not enabled.
1629 // In debug build we will randomly insert in either the front or back of
1631 // TODO(kfm,sbenza): revisit after we do unconditional mixing
1632 if (!is_small() && ShouldInsertBackwards(hash, ctrl_)) {
1633 return {seq.offset(mask.HighestBitSet()), seq.index()};
1636 return {seq.offset(mask.LowestBitSet()), seq.index()};
1638 assert(seq.index() < capacity_ && "full table!");
1643 // TODO(alkis): Optimize this assuming *this and that don't overlap.
1644 raw_hash_set& move_assign(raw_hash_set&& that, std::true_type) {
1645 raw_hash_set tmp(std::move(that));
1649 raw_hash_set& move_assign(raw_hash_set&& that, std::false_type) {
1650 raw_hash_set tmp(std::move(that), alloc_ref());
1657 std::pair<size_t, bool> find_or_prepare_insert(const K& key) {
1658 auto hash = hash_ref()(key);
1659 auto seq = probe(hash);
1661 Group g{ctrl_ + seq.offset()};
1662 for (int i : g.Match(H2(hash))) {
1663 if (ABSL_PREDICT_TRUE(PolicyTraits::apply(
1664 EqualElement<K>{key, eq_ref()},
1665 PolicyTraits::element(slots_ + seq.offset(i)))))
1666 return {seq.offset(i), false};
1668 if (ABSL_PREDICT_TRUE(g.MatchEmpty())) break;
1671 return {prepare_insert(hash), true};
1674 size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE {
1675 auto target = find_first_non_full(hash);
1676 if (ABSL_PREDICT_FALSE(growth_left() == 0 &&
1677 !IsDeleted(ctrl_[target.offset]))) {
1678 rehash_and_grow_if_necessary();
1679 target = find_first_non_full(hash);
1682 growth_left() -= IsEmpty(ctrl_[target.offset]);
1683 set_ctrl(target.offset, H2(hash));
1684 infoz_.RecordInsert(hash, target.probe_length);
1685 return target.offset;
1688 // Constructs the value in the space pointed by the iterator. This only works
1689 // after an unsuccessful find_or_prepare_insert() and before any other
1690 // modifications happen in the raw_hash_set.
1692 // PRECONDITION: i is an index returned from find_or_prepare_insert(k), where
1693 // k is the key decomposed from `forward<Args>(args)...`, and the bool
1694 // returned by find_or_prepare_insert(k) was true.
1695 // POSTCONDITION: *m.iterator_at(i) == value_type(forward<Args>(args)...).
1696 template <class... Args>
1697 void emplace_at(size_t i, Args&&... args) {
1698 PolicyTraits::construct(&alloc_ref(), slots_ + i,
1699 std::forward<Args>(args)...);
1701 assert(PolicyTraits::apply(FindElement{*this}, *iterator_at(i)) ==
1703 "constructed value does not match the lookup key");
1706 iterator iterator_at(size_t i) { return {ctrl_ + i, slots_ + i}; }
1707 const_iterator iterator_at(size_t i) const { return {ctrl_ + i, slots_ + i}; }
1710 friend struct RawHashSetTestOnlyAccess;
1712 probe_seq<Group::kWidth> probe(size_t hash) const {
1713 return probe_seq<Group::kWidth>(H1(hash, ctrl_), capacity_);
1716 // Reset all ctrl bytes back to kEmpty, except the sentinel.
1718 std::memset(ctrl_, kEmpty, capacity_ + Group::kWidth);
1719 ctrl_[capacity_] = kSentinel;
1720 SanitizerPoisonMemoryRegion(slots_, sizeof(slot_type) * capacity_);
1723 void reset_growth_left() {
1724 growth_left() = CapacityToGrowth(capacity()) - size_;
1727 // Sets the control byte, and if `i < Group::kWidth`, set the cloned byte at
1729 void set_ctrl(size_t i, ctrl_t h) {
1730 assert(i < capacity_);
1733 SanitizerUnpoisonObject(slots_ + i);
1735 SanitizerPoisonObject(slots_ + i);
1739 ctrl_[((i - Group::kWidth) & capacity_) + 1 +
1740 ((Group::kWidth - 1) & capacity_)] = h;
1743 size_t& growth_left() { return settings_.template get<0>(); }
1745 // The representation of the object has two modes:
1746 // - small: For capacities < kWidth-1
1747 // - large: For the rest.
1750 // - In small mode we are able to use the whole capacity. The extra control
1751 // bytes give us at least one "empty" control byte to stop the iteration.
1752 // This is important to make 1 a valid capacity.
1754 // - In small mode only the first `capacity()` control bytes after the
1755 // sentinel are valid. The rest contain dummy kEmpty values that do not
1756 // represent a real slot. This is important to take into account on
1757 // find_first_non_full(), where we never try ShouldInsertBackwards() for
1759 bool is_small() const { return capacity_ < Group::kWidth - 1; }
1761 hasher& hash_ref() { return settings_.template get<1>(); }
1762 const hasher& hash_ref() const { return settings_.template get<1>(); }
1763 key_equal& eq_ref() { return settings_.template get<2>(); }
1764 const key_equal& eq_ref() const { return settings_.template get<2>(); }
1765 allocator_type& alloc_ref() { return settings_.template get<3>(); }
1766 const allocator_type& alloc_ref() const {
1767 return settings_.template get<3>();
1770 // TODO(alkis): Investigate removing some of these fields:
1771 // - ctrl/slots can be derived from each other
1772 // - size can be moved into the slot array
1773 ctrl_t* ctrl_ = EmptyGroup(); // [(capacity + 1) * ctrl_t]
1774 slot_type* slots_ = nullptr; // [capacity * slot_type]
1775 size_t size_ = 0; // number of full slots
1776 size_t capacity_ = 0; // total number of slots
1777 HashtablezInfoHandle infoz_;
1778 absl::container_internal::CompressedTuple<size_t /* growth_left */, hasher,
1779 key_equal, allocator_type>
1780 settings_{0, hasher{}, key_equal{}, allocator_type{}};
1783 namespace hashtable_debug_internal {
1784 template <typename Set>
1785 struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> {
1786 using Traits = typename Set::PolicyTraits;
1787 using Slot = typename Traits::slot_type;
1789 static size_t GetNumProbes(const Set& set,
1790 const typename Set::key_type& key) {
1791 size_t num_probes = 0;
1792 size_t hash = set.hash_ref()(key);
1793 auto seq = set.probe(hash);
1795 container_internal::Group g{set.ctrl_ + seq.offset()};
1796 for (int i : g.Match(container_internal::H2(hash))) {
1798 typename Set::template EqualElement<typename Set::key_type>{
1800 Traits::element(set.slots_ + seq.offset(i))))
1804 if (g.MatchEmpty()) return num_probes;
1810 static size_t AllocatedByteSize(const Set& c) {
1811 size_t capacity = c.capacity_;
1812 if (capacity == 0) return 0;
1813 auto layout = Set::MakeLayout(capacity);
1814 size_t m = layout.AllocSize();
1816 size_t per_slot = Traits::space_used(static_cast<const Slot*>(nullptr));
1817 if (per_slot != ~size_t{}) {
1818 m += per_slot * c.size();
1820 for (size_t i = 0; i != capacity; ++i) {
1821 if (container_internal::IsFull(c.ctrl_[i])) {
1822 m += Traits::space_used(c.slots_ + i);
1829 static size_t LowerBoundAllocatedByteSize(size_t size) {
1830 size_t capacity = GrowthToLowerboundCapacity(size);
1831 if (capacity == 0) return 0;
1832 auto layout = Set::MakeLayout(NormalizeCapacity(capacity));
1833 size_t m = layout.AllocSize();
1834 size_t per_slot = Traits::space_used(static_cast<const Slot*>(nullptr));
1835 if (per_slot != ~size_t{}) {
1836 m += per_slot * size;
1842 } // namespace hashtable_debug_internal
1843 } // namespace container_internal
1846 #endif // ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_