Skip to content

Commit 0b1e6d4

Browse files
Abseil Teamastrelni
Abseil Team
authored andcommitted
Export of internal Abseil changes.
-- 461c1b6eb19490429db3bc6dd10ee32df9429cd7 by Samuel Benzaquen <sbenza@google.com>: Group all the capacity/growth calculation in one place. This helps remove the unnecessary floating point operations. PiperOrigin-RevId: 229928140 GitOrigin-RevId: 461c1b6eb19490429db3bc6dd10ee32df9429cd7 Change-Id: Ib00f85a6033fcd06a1d38a5987670b1524a80f93
1 parent efccc50 commit 0b1e6d4

File tree

2 files changed

+66
-25
lines changed

2 files changed

+66
-25
lines changed

absl/container/internal/raw_hash_set.h

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,28 @@ inline size_t NormalizeCapacity(size_t n) {
480480
: (std::numeric_limits<size_t>::max)() >> LeadingZeros(n);
481481
}
482482

483+
// We use 7/8th as maximum load factor.
484+
// For 16-wide groups, that gives an average of two empty slots per group.
485+
inline size_t CapacityToGrowth(size_t capacity) {
486+
assert(IsValidCapacity(capacity));
487+
// `capacity*7/8`
488+
if (Group::kWidth == 8 && capacity == 7) {
489+
// x-x/8 does not work when x==7.
490+
return 6;
491+
}
492+
return capacity - capacity / 8;
493+
}
494+
// From desired "growth" to a lowerbound of the necessary capacity.
495+
// Might not be a valid one and required NormalizeCapacity().
496+
inline size_t GrowthToLowerboundCapacity(size_t growth) {
497+
// `growth*8/7`
498+
if (Group::kWidth == 8 && growth == 7) {
499+
// x+(x-1)/7 does not work when x==7.
500+
return 8;
501+
}
502+
return growth + static_cast<size_t>((static_cast<int64_t>(growth) - 1) / 7);
503+
}
504+
483505
// The node_handle concept from C++17.
484506
// We specialize node_handle for sets and maps. node_handle_base holds the
485507
// common API of both.
@@ -819,7 +841,7 @@ class raw_hash_set {
819841
: ctrl_(EmptyGroup()), settings_(0, hash, eq, alloc) {
820842
if (bucket_count) {
821843
capacity_ = NormalizeCapacity(bucket_count);
822-
growth_left() = static_cast<size_t>(capacity_ * kMaxLoadFactor);
844+
reset_growth_left();
823845
initialize_slots();
824846
}
825847
}
@@ -1031,7 +1053,7 @@ class raw_hash_set {
10311053
}
10321054
size_ = 0;
10331055
reset_ctrl();
1034-
growth_left() = static_cast<size_t>(capacity_ * kMaxLoadFactor);
1056+
reset_growth_left();
10351057
}
10361058
assert(empty());
10371059
infoz_.RecordStorageChanged(size_, capacity_);
@@ -1325,16 +1347,16 @@ class raw_hash_set {
13251347
infoz_.RecordStorageChanged(size_, capacity_);
13261348
return;
13271349
}
1328-
auto m = NormalizeCapacity((std::max)(n, NumSlotsFast(size())));
1350+
// bitor is a faster way of doing `max` here. We will round up to the next
1351+
// power-of-2-minus-1, so bitor is good enough.
1352+
auto m = NormalizeCapacity(n | GrowthToLowerboundCapacity(size()));
13291353
// n == 0 unconditionally rehashes as per the standard.
13301354
if (n == 0 || m > capacity_) {
13311355
resize(m);
13321356
}
13331357
}
13341358

1335-
void reserve(size_t n) {
1336-
rehash(NumSlotsFast(n));
1337-
}
1359+
void reserve(size_t n) { rehash(GrowthToLowerboundCapacity(n)); }
13381360

13391361
// Extension API: support for heterogeneous keys.
13401362
//
@@ -1512,13 +1534,6 @@ class raw_hash_set {
15121534
slot_type&& slot;
15131535
};
15141536

1515-
// Computes std::ceil(n / kMaxLoadFactor). Faster than calling std::ceil.
1516-
static inline size_t NumSlotsFast(size_t n) {
1517-
return static_cast<size_t>(
1518-
(n * kMaxLoadFactorDenominator + (kMaxLoadFactorNumerator - 1)) /
1519-
kMaxLoadFactorNumerator);
1520-
}
1521-
15221537
// "erases" the object from the container, except that it doesn't actually
15231538
// destroy the object. It only updates all the metadata of the class.
15241539
// This can be used in conjunction with Policy::transfer to move the object to
@@ -1556,7 +1571,7 @@ class raw_hash_set {
15561571
ctrl_ = reinterpret_cast<ctrl_t*>(layout.template Pointer<0>(mem));
15571572
slots_ = layout.template Pointer<1>(mem);
15581573
reset_ctrl();
1559-
growth_left() = static_cast<size_t>(capacity_ * kMaxLoadFactor) - size_;
1574+
reset_growth_left();
15601575
infoz_.RecordStorageChanged(size_, capacity_);
15611576
}
15621577

@@ -1662,13 +1677,13 @@ class raw_hash_set {
16621677
--i; // repeat
16631678
}
16641679
}
1665-
growth_left() = static_cast<size_t>(capacity_ * kMaxLoadFactor) - size_;
1680+
reset_growth_left();
16661681
}
16671682

16681683
void rehash_and_grow_if_necessary() {
16691684
if (capacity_ == 0) {
16701685
resize(Group::kWidth - 1);
1671-
} else if (size() <= kMaxLoadFactor / 2 * capacity_) {
1686+
} else if (size() <= CapacityToGrowth(capacity()) / 2) {
16721687
// Squash DELETED without growing if there is enough capacity.
16731688
drop_deletes_without_resize();
16741689
} else {
@@ -1811,6 +1826,10 @@ class raw_hash_set {
18111826
SanitizerPoisonMemoryRegion(slots_, sizeof(slot_type) * capacity_);
18121827
}
18131828

1829+
void reset_growth_left() {
1830+
growth_left() = CapacityToGrowth(capacity()) - size_;
1831+
}
1832+
18141833
// Sets the control byte, and if `i < Group::kWidth`, set the cloned byte at
18151834
// the end too.
18161835
void set_ctrl(size_t i, ctrl_t h) {
@@ -1837,12 +1856,6 @@ class raw_hash_set {
18371856
return settings_.template get<3>();
18381857
}
18391858

1840-
// On average each group has 2 empty slot (for the vectorized case).
1841-
static constexpr int64_t kMaxLoadFactorNumerator = 14;
1842-
static constexpr int64_t kMaxLoadFactorDenominator = 16;
1843-
static constexpr float kMaxLoadFactor =
1844-
1.0 * kMaxLoadFactorNumerator / kMaxLoadFactorDenominator;
1845-
18461859
// TODO(alkis): Investigate removing some of these fields:
18471860
// - ctrl/slots can be derived from each other
18481861
// - size can be moved into the slot array
@@ -1903,10 +1916,9 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> {
19031916
}
19041917

19051918
static size_t LowerBoundAllocatedByteSize(size_t size) {
1906-
size_t capacity = container_internal::NormalizeCapacity(
1907-
std::ceil(size / Set::kMaxLoadFactor));
1919+
size_t capacity = GrowthToLowerboundCapacity(size);
19081920
if (capacity == 0) return 0;
1909-
auto layout = Set::MakeLayout(capacity);
1921+
auto layout = Set::MakeLayout(NormalizeCapacity(capacity));
19101922
size_t m = layout.AllocSize();
19111923
size_t per_slot = Traits::space_used(static_cast<const Slot*>(nullptr));
19121924
if (per_slot != ~size_t{}) {

absl/container/internal/raw_hash_set_test.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ namespace {
4848

4949
using ::testing::DoubleNear;
5050
using ::testing::ElementsAre;
51+
using ::testing::Ge;
52+
using ::testing::Lt;
5153
using ::testing::Optional;
5254
using ::testing::Pair;
5355
using ::testing::UnorderedElementsAre;
@@ -62,6 +64,33 @@ TEST(Util, NormalizeCapacity) {
6264
EXPECT_EQ(kMinCapacity * 2 + 1, NormalizeCapacity(kMinCapacity + 2));
6365
}
6466

67+
TEST(Util, GrowthAndCapacity) {
68+
// Verify that GrowthToCapacity gives the minimum capacity that has enough
69+
// growth.
70+
for (size_t growth = 0; growth < 10000; ++growth) {
71+
SCOPED_TRACE(growth);
72+
size_t capacity = NormalizeCapacity(GrowthToLowerboundCapacity(growth));
73+
// The capacity is large enough for `growth`
74+
EXPECT_THAT(CapacityToGrowth(capacity), Ge(growth));
75+
if (growth < Group::kWidth - 1) {
76+
// Fits in one group, that is the minimum capacity.
77+
EXPECT_EQ(capacity, Group::kWidth - 1);
78+
} else {
79+
// There is no smaller capacity that works.
80+
EXPECT_THAT(CapacityToGrowth(capacity / 2), Lt(growth));
81+
}
82+
}
83+
84+
for (size_t capacity = Group::kWidth - 1; capacity < 10000;
85+
capacity = 2 * capacity + 1) {
86+
SCOPED_TRACE(capacity);
87+
size_t growth = CapacityToGrowth(capacity);
88+
EXPECT_THAT(growth, Lt(capacity));
89+
EXPECT_LE(GrowthToLowerboundCapacity(growth), capacity);
90+
EXPECT_EQ(NormalizeCapacity(GrowthToLowerboundCapacity(growth)), capacity);
91+
}
92+
}
93+
6594
TEST(Util, probe_seq) {
6695
probe_seq<16> seq(0, 127);
6796
auto gen = [&]() {

0 commit comments

Comments
 (0)