From dd69c96310efc30931d07a1f9a988f415947bb7e Mon Sep 17 00:00:00 2001 From: Omer Mor Date: Tue, 6 May 2025 07:36:06 -0700 Subject: [PATCH 001/107] Extract `WideToUtf8` helper to `utf8.h`. This change allows to reuse the conversion of `wchar_t` into UTF-8 from other code. It also adds tests that covers the conversion. PiperOrigin-RevId: 755365598 Change-Id: I0f1c0b7949dec3c9874be99f01540748436532ce --- absl/strings/BUILD.bazel | 2 + absl/strings/CMakeLists.txt | 2 + absl/strings/internal/str_format/arg.cc | 63 +------- absl/strings/internal/utf8.cc | 46 ++++++ absl/strings/internal/utf8.h | 14 ++ absl/strings/internal/utf8_test.cc | 186 +++++++++++++++++++++++- 6 files changed, 253 insertions(+), 60 deletions(-) diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 49562f72c61..bb152acc325 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -329,6 +329,7 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":internal", + ":string_view", "//absl/base:core_headers", "@googletest//:gtest", "@googletest//:gtest_main", @@ -1316,6 +1317,7 @@ cc_library( linkopts = ABSL_DEFAULT_LINKOPTS, visibility = ["//visibility:private"], deps = [ + ":internal", ":strings", "//absl/base:config", "//absl/base:core_headers", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index ee738605520..547ef268690 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -243,6 +243,7 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS + absl::string_view absl::strings_internal absl::base absl::core_headers @@ -518,6 +519,7 @@ absl_cc_library( absl::utility absl::int128 absl::span + absl::strings_internal ) absl_cc_test( diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index eeb2108154f..103c85d16ea 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -34,6 +34,7 @@ #include "absl/numeric/int128.h" #include "absl/strings/internal/str_format/extension.h" #include "absl/strings/internal/str_format/float_conversion.h" +#include "absl/strings/internal/utf8.h" #include "absl/strings/numbers.h" #include "absl/strings/string_view.h" @@ -311,68 +312,16 @@ inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, conv.has_left_flag()); } -struct ShiftState { - bool saw_high_surrogate = false; - uint8_t bits = 0; -}; - -// Converts `v` from UTF-16 or UTF-32 to UTF-8 and writes to `buf`. `buf` is -// assumed to have enough space for the output. `s` is used to carry state -// between successive calls with a UTF-16 surrogate pair. Returns the number of -// chars written, or `static_cast(-1)` on failure. -// -// This is basically std::wcrtomb(), but always outputting UTF-8 instead of -// respecting the current locale. -inline size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s) { - const auto v = static_cast(wc); - if (v < 0x80) { - *buf = static_cast(v); - return 1; - } else if (v < 0x800) { - *buf++ = static_cast(0xc0 | (v >> 6)); - *buf = static_cast(0x80 | (v & 0x3f)); - return 2; - } else if (v < 0xd800 || (v - 0xe000) < 0x2000) { - *buf++ = static_cast(0xe0 | (v >> 12)); - *buf++ = static_cast(0x80 | ((v >> 6) & 0x3f)); - *buf = static_cast(0x80 | (v & 0x3f)); - return 3; - } else if ((v - 0x10000) < 0x100000) { - *buf++ = static_cast(0xf0 | (v >> 18)); - *buf++ = static_cast(0x80 | ((v >> 12) & 0x3f)); - *buf++ = static_cast(0x80 | ((v >> 6) & 0x3f)); - *buf = static_cast(0x80 | (v & 0x3f)); - return 4; - } else if (v < 0xdc00) { - s.saw_high_surrogate = true; - s.bits = static_cast(v & 0x3); - const uint8_t high_bits = ((v >> 6) & 0xf) + 1; - *buf++ = static_cast(0xf0 | (high_bits >> 2)); - *buf = - static_cast(0x80 | static_cast((high_bits & 0x3) << 4) | - static_cast((v >> 2) & 0xf)); - return 2; - } else if (v < 0xe000 && s.saw_high_surrogate) { - *buf++ = static_cast(0x80 | static_cast(s.bits << 4) | - static_cast((v >> 6) & 0xf)); - *buf = static_cast(0x80 | (v & 0x3f)); - s.saw_high_surrogate = false; - s.bits = 0; - return 2; - } else { - return static_cast(-1); - } -} - inline bool ConvertStringArg(const wchar_t *v, size_t len, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { FixedArray mb(len * 4); - ShiftState s; + strings_internal::ShiftState s; size_t chars_written = 0; for (size_t i = 0; i < len; ++i) { - const size_t chars = WideToUtf8(v[i], &mb[chars_written], s); + const size_t chars = + strings_internal::WideToUtf8(v[i], &mb[chars_written], s); if (chars == static_cast(-1)) { return false; } chars_written += chars; } @@ -382,8 +331,8 @@ inline bool ConvertStringArg(const wchar_t *v, bool ConvertWCharTImpl(wchar_t v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { char mb[4]; - ShiftState s; - const size_t chars_written = WideToUtf8(v, mb, s); + strings_internal::ShiftState s; + const size_t chars_written = strings_internal::WideToUtf8(v, mb, s); return chars_written != static_cast(-1) && !s.saw_high_surrogate && ConvertStringArg(string_view(mb, chars_written), conv, sink); } diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc index 7ecb93dfbe7..4370c7c73a4 100644 --- a/absl/strings/internal/utf8.cc +++ b/absl/strings/internal/utf8.cc @@ -16,6 +16,11 @@ #include "absl/strings/internal/utf8.h" +#include +#include + +#include "absl/base/config.h" + namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { @@ -48,6 +53,47 @@ size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { } } +size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s) { + const auto v = static_cast(wc); + if (v < 0x80) { + *buf = static_cast(v); + return 1; + } else if (v < 0x800) { + *buf++ = static_cast(0xc0 | (v >> 6)); + *buf = static_cast(0x80 | (v & 0x3f)); + return 2; + } else if (v < 0xd800 || (v - 0xe000) < 0x2000) { + *buf++ = static_cast(0xe0 | (v >> 12)); + *buf++ = static_cast(0x80 | ((v >> 6) & 0x3f)); + *buf = static_cast(0x80 | (v & 0x3f)); + return 3; + } else if ((v - 0x10000) < 0x100000) { + *buf++ = static_cast(0xf0 | (v >> 18)); + *buf++ = static_cast(0x80 | ((v >> 12) & 0x3f)); + *buf++ = static_cast(0x80 | ((v >> 6) & 0x3f)); + *buf = static_cast(0x80 | (v & 0x3f)); + return 4; + } else if (v < 0xdc00) { + s.saw_high_surrogate = true; + s.bits = static_cast(v & 0x3); + const uint8_t high_bits = ((v >> 6) & 0xf) + 1; + *buf++ = static_cast(0xf0 | (high_bits >> 2)); + *buf = + static_cast(0x80 | static_cast((high_bits & 0x3) << 4) | + static_cast((v >> 2) & 0xf)); + return 2; + } else if (v < 0xe000 && s.saw_high_surrogate) { + *buf++ = static_cast(0x80 | static_cast(s.bits << 4) | + static_cast((v >> 6) & 0xf)); + *buf = static_cast(0x80 | (v & 0x3f)); + s.saw_high_surrogate = false; + s.bits = 0; + return 2; + } else { + return static_cast(-1); + } +} + } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h index 32fb1093bea..f240408db44 100644 --- a/absl/strings/internal/utf8.h +++ b/absl/strings/internal/utf8.h @@ -43,6 +43,20 @@ namespace strings_internal { enum { kMaxEncodedUTF8Size = 4 }; size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); +struct ShiftState { + bool saw_high_surrogate = false; + uint8_t bits = 0; +}; + +// Converts `wc` from UTF-16 or UTF-32 to UTF-8 and writes to `buf`. `buf` is +// assumed to have enough space for the output. `s` is used to carry state +// between successive calls with a UTF-16 surrogate pair. Returns the number of +// chars written, or `static_cast(-1)` on failure. +// +// This is basically std::wcrtomb(), but always outputting UTF-8 instead of +// respecting the current locale. +size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s); + } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/utf8_test.cc b/absl/strings/internal/utf8_test.cc index 88dd5036e3d..62322dd1903 100644 --- a/absl/strings/internal/utf8_test.cc +++ b/absl/strings/internal/utf8_test.cc @@ -14,14 +14,29 @@ #include "absl/strings/internal/utf8.h" +#include #include +#include +#include +#include #include +#include +#include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/port.h" +#include "absl/strings/string_view.h" namespace { +using ::absl::strings_internal::kMaxEncodedUTF8Size; +using ::absl::strings_internal::ShiftState; +using ::absl::strings_internal::WideToUtf8; +using ::testing::StartsWith; +using ::testing::TestParamInfo; +using ::testing::TestWithParam; +using ::testing::ValuesIn; + #if !defined(__cpp_char8_t) #if defined(__clang__) #pragma clang diagnostic push @@ -33,12 +48,12 @@ TEST(EncodeUTF8Char, BasicFunction) { {0x00010000, u8"\U00010000"}, {0x0000FFFF, u8"\U0000FFFF"}, {0x0010FFFD, u8"\U0010FFFD"}}; - for (auto &test : tests) { + for (auto& test : tests) { char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}; char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'}; - char *buf0_written = + char* buf0_written = &buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)]; - char *buf1_written = + char* buf1_written = &buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)]; int apparent_length = 7; while (buf0[apparent_length - 1] == '\x00' && @@ -63,4 +78,169 @@ TEST(EncodeUTF8Char, BasicFunction) { #endif #endif // !defined(__cpp_char8_t) +struct WideToUtf8TestCase { + std::string description; + wchar_t input; + std::string expected_utf8_str; + size_t expected_bytes_written; + ShiftState initial_state = {false, 0}; + ShiftState expected_state = {false, 0}; +}; + +std::vector GetWideToUtf8TestCases() { + constexpr size_t kError = static_cast(-1); + std::vector cases = { + {"ASCII_A", L'A', "A", 1}, + {"NullChar", L'\0', std::string("\0", 1), 1}, + {"ASCII_Max_7F", L'\x7F', "\x7F", 1}, + + {"TwoByte_Min_80", L'\u0080', "\xC2\x80", 2}, + {"PoundSign_A3", L'\u00A3', "\xC2\xA3", 2}, + {"TwoByte_Max_7FF", L'\u07FF', "\xDF\xBF", 2}, + + {"ThreeByte_Min_800", L'\u0800', "\xE0\xA0\x80", 3}, + {"EuroSign_20AC", L'\u20AC', "\xE2\x82\xAC", 3}, + {"BMP_MaxBeforeSurrogates_D7FF", L'\uD7FF', "\xED\x9F\xBF", 3}, + {"BMP_FFFF", L'\uFFFF', "\xEF\xBF\xBF", 3}, + + {"IsolatedHighSurr_D800", L'\xD800', "\xF0\x90", 2, {true, 0}, {true, 0}}, + {"IsolatedHighSurr_DBFF", L'\xDBFF', "\xF4\x8F", 2, {true, 3}, {true, 3}}, + + {"LowSurr_DC00_after_HighD800", L'\xDC00', "\x80\x80", 2, {true, 0}, {}}, + {"LowSurr_DFFD_after_HighDBFF", L'\xDFFD', "\xBF\xBD", 2, {true, 3}, {}}, + {"LowSurr_DC00_with_InitialState_saw_high_bits_1", + L'\xDC00', + "\x90\x80", + 2, + {true, 1}, + {}}, + + // Final state = initial on error. + {"Error_IsolatedLowSurr_DC00_NoPriorHigh", L'\xDC00', "", kError, {}, {}}, + {"Error_IsolatedLowSurr_DFFF_NoPriorHigh", L'\xDFFF', "", kError, {}, {}}, + +#if (defined(WCHAR_MAX) && WCHAR_MAX > 0xFFFF) + {"DirectSupplementaryChars_U10000", static_cast(0x10000), + "\xF0\x90\x80\x80", 4}, + {"DirectSupplementaryChars_U10FFFD", static_cast(0x10FFFD), + "\xF4\x8F\xBF\xBD", 4}, +#endif + }; + + wchar_t minus_one = static_cast(-1); + if constexpr (sizeof(wchar_t) == 2) { + cases.push_back({"WChar_MinusOne_as_FFFF", minus_one, "\xEF\xBF\xBF", 3}); + } else { + cases.push_back( + {"Error_WChar_MinusOne_as_FFFFFFFF", minus_one, "", kError, {}, {}}); + } + + if constexpr (sizeof(wchar_t) >= 4) { +#ifdef WCHAR_MAX + if (static_cast(WCHAR_MAX) >= 0x110000UL) { + cases.push_back({"Error_OutOfRange_110000", + static_cast(0x110000UL), + "", + kError, + {}, + {}}); + } +#else + cases.push_back({"Error_OutOfRange_110000_fallback", + static_cast(0x110000UL), + "", + kError, + {}, + {}}); +#endif + } + return cases; +} + +class WideToUtf8ParamTest : public TestWithParam {}; + +TEST_P(WideToUtf8ParamTest, SingleCharConversion) { + const auto& test_case = GetParam(); + ShiftState state = test_case.initial_state; + constexpr char kFillChar = '\xAB'; + std::string buffer(32, kFillChar); + + size_t bytes_written = WideToUtf8(test_case.input, buffer.data(), state); + + EXPECT_EQ(bytes_written, test_case.expected_bytes_written); + EXPECT_THAT(buffer, StartsWith(test_case.expected_utf8_str)); + + // The remaining bytes should be unchanged. + ASSERT_LT(test_case.expected_utf8_str.length(), buffer.size()); + EXPECT_EQ(buffer[test_case.expected_utf8_str.length()], kFillChar); + + EXPECT_EQ(state.saw_high_surrogate, + test_case.expected_state.saw_high_surrogate); + EXPECT_EQ(state.bits, test_case.expected_state.bits); +} + +INSTANTIATE_TEST_SUITE_P(WideCharToUtf8Conversion, WideToUtf8ParamTest, + ValuesIn(GetWideToUtf8TestCases()), + [](auto info) { return info.param.description; }); + +// Comprehensive test string for validating wchar_t to UTF-8 conversion. +// This string is designed to cover a variety of Unicode character types and +// sequences: +// 1. Basic ASCII characters (within names, numbers, and spacing). +// 2. Common 2-byte UTF-8 sequences: +// - Accented Latin characters (e.g., 'á' in "Holá"). +// - Hebrew text with combining vowel points (e.g., "שָׁלוֹם"). +// 3. Common 3-byte UTF-8 sequences: +// - Currency symbols (e.g., '€'). +// - CJK characters (e.g., "你好", "中"). +// - Components of complex emojis like the Zero Width Joiner (ZWJ) and +// Heart symbol. +// 4. Various 4-byte UTF-8 sequences (representing Supplementary Plane +// characters): +// - An emoji with a skin tone modifier ("👍🏻"). +// - A flag emoji composed of regional indicators ("🇺🇸"). +// - A complex ZWJ emoji sequence ("👩‍❤️‍💋‍👨") combining +// SP characters (👩, 💋, 👨) with BMP characters (ZWJ and ❤️). +// - These are critical for testing the correct handling of surrogate pairs +// when wchar_t is 2 bytes (e.g., on Windows). +// The goal is to ensure accurate conversion across a diverse set of +// characters. +// +// clang-format off +#define WIDE_STRING_LITERAL L"Holá €1 你好 שָׁלוֹם 👍🏻🇺🇸👩‍❤️‍💋‍👨 中" +#define UTF8_STRING_LITERAL u8"Holá €1 你好 שָׁלוֹם 👍🏻🇺🇸👩‍❤️‍💋‍👨 中" +// clang-format on + +absl::string_view GetUtf8TestString() { + // `u8""` forces UTF-8 encoding; MSVC will default to e.g. CP1252 (and warn) + // without it. However, the resulting character type differs between pre-C++20 + // (`char`) and C++20 (`char8_t`). So deduce the right character type for all + // C++ versions, init it with UTF-8, then `memcpy()` to get the result as a + // `char*` + static absl::string_view kUtf8TestString = [] { + using ConstChar8T = std::remove_reference_t; + constexpr ConstChar8T kOutputUtf8[] = UTF8_STRING_LITERAL; + static char output[sizeof kOutputUtf8]; + std::memcpy(output, kOutputUtf8, sizeof kOutputUtf8); + return output; + }(); + + return kUtf8TestString; +} + +TEST(WideToUtf8, FullString) { + std::string buffer(kMaxEncodedUTF8Size * sizeof(WIDE_STRING_LITERAL), '\0'); + char* buffer_ptr = buffer.data(); + + ShiftState state; + for (const wchar_t wc : WIDE_STRING_LITERAL) { + buffer_ptr += WideToUtf8(wc, buffer_ptr, state); + } + + EXPECT_THAT(buffer, StartsWith(GetUtf8TestString())); +} + +#undef WIDE_STRING_LITERAL +#undef UTF8_STRING_LITERAL + } // namespace From 2bbec17a3f20da4d908e5627aa72b46f48e064ac Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Tue, 6 May 2025 08:42:12 -0700 Subject: [PATCH 002/107] Remove the explicit from the constructor to a test allocator in hash_policy_testing.h. This is rejected by Clang when using the libstdc++ that ships with GCC15 PiperOrigin-RevId: 755387792 Change-Id: I1b68d80b9603289da03db44606efcf02b8c8f379 --- absl/container/internal/hash_policy_testing.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/absl/container/internal/hash_policy_testing.h b/absl/container/internal/hash_policy_testing.h index 66bb12ec457..e9f57579ed7 100644 --- a/absl/container/internal/hash_policy_testing.h +++ b/absl/container/internal/hash_policy_testing.h @@ -119,7 +119,11 @@ struct Alloc : std::allocator { using propagate_on_container_swap = std::true_type; // Using old paradigm for this to ensure compatibility. - explicit Alloc(size_t id = 0) : id_(id) {} + // + // NOTE: As of 2025-05, this constructor cannot be explicit in order to work + // with the libstdc++ that ships with GCC15. + // NOLINTNEXTLINE(google-explicit-constructor) + Alloc(size_t id = 0) : id_(id) {} Alloc(const Alloc&) = default; Alloc& operator=(const Alloc&) = default; From c42f038fd262e2038b7dc1da7285dafc14091d98 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 6 May 2025 10:49:38 -0700 Subject: [PATCH 003/107] Move PrepareInsertCommon from header file to cc file. PiperOrigin-RevId: 755434777 Change-Id: Ic84e3dc3014b19f21ce98e8dae2529236ce4bf5e --- absl/container/internal/raw_hash_set.cc | 5 +++++ absl/container/internal/raw_hash_set.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index f19e87b3fa5..9d3b5992011 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -295,6 +295,11 @@ size_t FindFirstFullSlot(size_t start, size_t end, const ctrl_t* ctrl) { ABSL_UNREACHABLE(); } +void PrepareInsertCommon(CommonFields& common) { + common.increment_size(); + common.maybe_increment_generation_on_insert(); +} + size_t DropDeletesWithoutResizeAndPrepareInsert(CommonFields& common, const PolicyFunctions& policy, size_t new_hash) { diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 512c94624bc..3bc86d19dec 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1827,11 +1827,6 @@ void ResizeAllocatedTableWithSeedChange(CommonFields& common, const PolicyFunctions& policy, size_t new_capacity); -inline void PrepareInsertCommon(CommonFields& common) { - common.increment_size(); - common.maybe_increment_generation_on_insert(); -} - // ClearBackingArray clears the backing array, either modifying it in place, // or creating a new one based on the value of "reuse". // REQUIRES: c.capacity > 0 From 774d41004a6de8e13b2abf68cdf896a8808b3d67 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Tue, 6 May 2025 12:33:08 -0700 Subject: [PATCH 004/107] Use Bazel vendor mode to cache external dependencies on Windows and macOS Also upgrade to Bazel 8.2.1 since all platforms will need to use the same version of Bazel to get the most cache hits. Different Bazel versions have different dependencies. PiperOrigin-RevId: 755477047 Change-Id: Ib8838c6ef1e3b55841005d877346ed8afceca15a --- ci/macos_xcode_bazel.sh | 11 +++++------ ci/windows_clangcl_bazel.bat | 14 +++++++++++--- ci/windows_msvc_bazel.bat | 14 +++++++++++--- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/ci/macos_xcode_bazel.sh b/ci/macos_xcode_bazel.sh index 51ffde8d62a..b05cfac2267 100755 --- a/ci/macos_xcode_bazel.sh +++ b/ci/macos_xcode_bazel.sh @@ -27,7 +27,7 @@ if [[ -z ${ABSEIL_ROOT:-} ]]; then fi # If we are running on Kokoro, check for a versioned Bazel binary. -KOKORO_GFILE_BAZEL_BIN="bazel-8.0.0-darwin-x86_64" +KOKORO_GFILE_BAZEL_BIN="bazel-8.2.1-darwin-x86_64" if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f ${KOKORO_GFILE_DIR}/${KOKORO_GFILE_BAZEL_BIN} ]]; then BAZEL_BIN="${KOKORO_GFILE_DIR}/${KOKORO_GFILE_BAZEL_BIN}" chmod +x ${BAZEL_BIN} @@ -35,11 +35,10 @@ else BAZEL_BIN="bazel" fi -# Avoid depending on external sites like GitHub by checking --distdir for -# external dependencies first. -# https://docs.bazel.build/versions/master/guide.html#distdir -if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -d "${KOKORO_GFILE_DIR}/distdir" ]]; then - BAZEL_EXTRA_ARGS="--distdir=${KOKORO_GFILE_DIR}/distdir ${BAZEL_EXTRA_ARGS:-}" +# Use Bazel Vendor mode to reduce reliance on external dependencies. +if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f "${KOKORO_GFILE_DIR}/distdir/abseil-cpp_vendor.tar.gz" ]]; then + tar -xf "${KOKORO_GFILE_DIR}/distdir/abseil-cpp_vendor.tar.gz" -C "${TMP}/" + BAZEL_EXTRA_ARGS="--vendor_dir=\"${TMP}/abseil-cpp_vendor\" ${BAZEL_EXTRA_ARGS:-}" fi # Print the compiler and Bazel versions. diff --git a/ci/windows_clangcl_bazel.bat b/ci/windows_clangcl_bazel.bat index f9512ef0bce..26fd5af863e 100755 --- a/ci/windows_clangcl_bazel.bat +++ b/ci/windows_clangcl_bazel.bat @@ -21,6 +21,14 @@ SET BAZEL_LLVM=C:\Program Files\LLVM CD %~dp0\.. if %errorlevel% neq 0 EXIT /B 1 +:: Use Bazel Vendor mode to reduce reliance on external dependencies. +IF EXIST "%KOKORO_GFILE_DIR%\distdir\abseil-cpp_vendor.tar.gz" ( + tar --force-local -xf "%KOKORO_GFILE_DIR%\distdir\abseil-cpp_vendor.tar.gz" -C c:\ + SET VENDOR_FLAG=--vendor_dir=c:\abseil-cpp_vendor +) ELSE ( + SET VENDOR_FLAG= +) + :: Set the standard version, [c++17|c++20|c++latest] :: https://msdn.microsoft.com/en-us/library/mt490614.aspx :: The default is c++17 if not set on command line. @@ -39,7 +47,7 @@ IF NOT "%ALTERNATE_OPTIONS%"=="" copy %ALTERNATE_OPTIONS% absl\base\options.h :: /google/data/rw/teams/absl/kokoro/windows. :: :: TODO(absl-team): Remove -Wno-microsoft-cast -%KOKORO_GFILE_DIR%\bazel-8.0.0-windows-x86_64.exe ^ +%KOKORO_GFILE_DIR%\bazel-8.2.1-windows-x86_64.exe ^ test ... ^ --compilation_mode=%COMPILATION_MODE% ^ --compiler=clang-cl ^ @@ -47,7 +55,6 @@ IF NOT "%ALTERNATE_OPTIONS%"=="" copy %ALTERNATE_OPTIONS% absl\base\options.h --copt=-Wno-microsoft-cast ^ --cxxopt=/std:%STD% ^ --define=absl=1 ^ - --distdir=%KOKORO_GFILE_DIR%\distdir ^ --enable_bzlmod=true ^ --extra_execution_platforms=//:x64_windows-clang-cl ^ --extra_toolchains=@local_config_cc//:cc-toolchain-x64_windows-clang-cl ^ @@ -55,7 +62,8 @@ IF NOT "%ALTERNATE_OPTIONS%"=="" copy %ALTERNATE_OPTIONS% absl\base\options.h --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1" ^ --test_env=TZDIR="%CD%\absl\time\internal\cctz\testdata\zoneinfo" ^ --test_output=errors ^ - --test_tag_filters=-benchmark + --test_tag_filters=-benchmark ^ + %VENDOR_FLAG% if %errorlevel% neq 0 EXIT /B 1 EXIT /B 0 diff --git a/ci/windows_msvc_bazel.bat b/ci/windows_msvc_bazel.bat index e0cd0169bed..bbb57b41b25 100755 --- a/ci/windows_msvc_bazel.bat +++ b/ci/windows_msvc_bazel.bat @@ -18,6 +18,14 @@ SETLOCAL ENABLEDELAYEDEXPANSION CD %~dp0\.. if %errorlevel% neq 0 EXIT /B 1 +:: Use Bazel Vendor mode to reduce reliance on external dependencies. +IF EXIST "%KOKORO_GFILE_DIR%\distdir\abseil-cpp_vendor.tar.gz" ( + tar --force-local -xf "%KOKORO_GFILE_DIR%\distdir\abseil-cpp_vendor.tar.gz" -C c:\ + SET VENDOR_FLAG=--vendor_dir=c:\abseil-cpp_vendor +) ELSE ( + SET VENDOR_FLAG= +) + :: Set the standard version, [c++17|c++latest] :: https://msdn.microsoft.com/en-us/library/mt490614.aspx :: The default is c++17 if not set on command line. @@ -34,19 +42,19 @@ IF NOT "%ALTERNATE_OPTIONS%"=="" copy %ALTERNATE_OPTIONS% absl\base\options.h :: To upgrade Bazel, first download a new binary from :: https://github.com/bazelbuild/bazel/releases and copy it to :: /google/data/rw/teams/absl/kokoro/windows. -%KOKORO_GFILE_DIR%\bazel-8.0.0-windows-x86_64.exe ^ +"%KOKORO_GFILE_DIR%\bazel-8.2.1-windows-x86_64.exe" ^ test ... ^ --compilation_mode=%COMPILATION_MODE% ^ --copt=/WX ^ --copt=/std:%STD% ^ --define=absl=1 ^ - --distdir=%KOKORO_GFILE_DIR%\distdir ^ --enable_bzlmod=true ^ --keep_going ^ --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1" ^ --test_env=TZDIR="%CD%\absl\time\internal\cctz\testdata\zoneinfo" ^ --test_output=errors ^ - --test_tag_filters=-benchmark + --test_tag_filters=-benchmark ^ + %VENDOR_FLAG% if %errorlevel% neq 0 EXIT /B 1 EXIT /B 0 From e3a2008867c5dc5c0f4c7a4e64fb568df70f23be Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 6 May 2025 13:13:11 -0700 Subject: [PATCH 005/107] Add __restrict for uses of PolicyFunctions. This lets the compiler know that PolicyFunctions never aliases other variables so e.g. when we write to control bytes, we don't need to reload PolicyFunctions members that we've previously accessed. PiperOrigin-RevId: 755493766 Change-Id: Ic7729a9726da00bcaabd064ffd046a6260dd6480 --- absl/container/internal/raw_hash_set.cc | 135 ++++++++++++------------ 1 file changed, 70 insertions(+), 65 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index 9d3b5992011..339e662d012 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -300,9 +300,9 @@ void PrepareInsertCommon(CommonFields& common) { common.maybe_increment_generation_on_insert(); } -size_t DropDeletesWithoutResizeAndPrepareInsert(CommonFields& common, - const PolicyFunctions& policy, - size_t new_hash) { +size_t DropDeletesWithoutResizeAndPrepareInsert( + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t new_hash) { void* set = &common; void* slot_array = common.slot_array(); const size_t capacity = common.capacity(); @@ -404,7 +404,7 @@ size_t DropDeletesWithoutResizeAndPrepareInsert(CommonFields& common, PrepareInsertCommon(common); ResetGrowthLeft(common); FindInfo find_info = find_first_non_full(common, new_hash); - SetCtrlInLargeTable(common, find_info.offset, H2(new_hash), policy.slot_size); + SetCtrlInLargeTable(common, find_info.offset, H2(new_hash), slot_size); common.infoz().RecordInsert(new_hash, find_info.probe_length); common.infoz().RecordRehash(total_probe_length); return find_info.offset; @@ -560,8 +560,9 @@ void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { SetCtrlInLargeTable(c, index, ctrl_t::kDeleted, slot_size); } -void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy, - void* alloc, bool reuse, bool soo_enabled) { +void ClearBackingArray(CommonFields& c, + const PolicyFunctions& __restrict policy, void* alloc, + bool reuse, bool soo_enabled) { if (reuse) { c.set_size_to_zero(); ABSL_SWISSTABLE_ASSERT(!soo_enabled || c.capacity() > SooCapacity()); @@ -592,10 +593,9 @@ enum class ResizeNonSooMode { // This function is used for reserving or rehashing non-empty tables. // This use case is rare so the function is type erased. // Returns the total probe length. -size_t FindNewPositionsAndTransferSlots(CommonFields& common, - const PolicyFunctions& policy, - ctrl_t* old_ctrl, void* old_slots, - size_t old_capacity) { +size_t FindNewPositionsAndTransferSlots( + CommonFields& common, const PolicyFunctions& __restrict policy, + ctrl_t* old_ctrl, void* old_slots, size_t old_capacity) { void* new_slots = common.slot_array(); const void* hash_fn = policy.hash_fn(common); const size_t slot_size = policy.slot_size; @@ -619,7 +619,8 @@ size_t FindNewPositionsAndTransferSlots(CommonFields& common, } template -void ResizeNonSooImpl(CommonFields& common, const PolicyFunctions& policy, +void ResizeNonSooImpl(CommonFields& common, + const PolicyFunctions& __restrict policy, size_t new_capacity, HashtablezInfoHandle infoz) { ABSL_SWISSTABLE_ASSERT(IsValidCapacity(new_capacity)); ABSL_SWISSTABLE_ASSERT(new_capacity > policy.soo_capacity()); @@ -672,7 +673,7 @@ void ResizeNonSooImpl(CommonFields& common, const PolicyFunctions& policy, } void ResizeEmptyNonAllocatedTableImpl(CommonFields& common, - const PolicyFunctions& policy, + const PolicyFunctions& __restrict policy, size_t new_capacity, bool force_infoz) { ABSL_SWISSTABLE_ASSERT(IsValidCapacity(new_capacity)); ABSL_SWISSTABLE_ASSERT(new_capacity > policy.soo_capacity()); @@ -695,10 +696,9 @@ void ResizeEmptyNonAllocatedTableImpl(CommonFields& common, // After transferring the slot, sets control and slots in CommonFields. // It is rare to resize an SOO table with one element to a large size. // Requires: `c` contains SOO data. -void InsertOldSooSlotAndInitializeControlBytes(CommonFields& c, - const PolicyFunctions& policy, - size_t hash, ctrl_t* new_ctrl, - void* new_slots) { +void InsertOldSooSlotAndInitializeControlBytes( + CommonFields& c, const PolicyFunctions& __restrict policy, size_t hash, + ctrl_t* new_ctrl, void* new_slots) { ABSL_SWISSTABLE_ASSERT(c.size() == policy.soo_capacity()); ABSL_SWISSTABLE_ASSERT(policy.soo_enabled); size_t new_capacity = c.capacity(); @@ -733,7 +733,8 @@ void AssertFullSoo([[maybe_unused]] CommonFields& common, ABSL_SWISSTABLE_ASSERT(common.size() == policy.soo_capacity()); } -void ResizeFullSooTable(CommonFields& common, const PolicyFunctions& policy, +void ResizeFullSooTable(CommonFields& common, + const PolicyFunctions& __restrict policy, size_t new_capacity, ResizeFullSooTableSamplingMode sampling_mode) { AssertFullSoo(common, policy); @@ -917,8 +918,8 @@ constexpr size_t kProbedElementsBufferSize = 512; // Returns the total probe length. template ABSL_ATTRIBUTE_NOINLINE size_t DecodeAndInsertImpl( - CommonFields& c, const PolicyFunctions& policy, const ProbedItem* start, - const ProbedItem* end, void* old_slots) { + CommonFields& c, const PolicyFunctions& __restrict policy, + const ProbedItem* start, const ProbedItem* end, void* old_slots) { const size_t new_capacity = c.capacity(); void* new_slots = c.slot_array(); @@ -954,9 +955,9 @@ constexpr size_t kNoMarkedElementsSentinel = ~size_t{}; // We marked them in control bytes as kSentinel. // Hash recomputation and full probing is done here. // This use case should be extremely rare. -ABSL_ATTRIBUTE_NOINLINE size_t -ProcessProbedMarkedElements(CommonFields& c, const PolicyFunctions& policy, - ctrl_t* old_ctrl, void* old_slots, size_t start) { +ABSL_ATTRIBUTE_NOINLINE size_t ProcessProbedMarkedElements( + CommonFields& c, const PolicyFunctions& __restrict policy, ctrl_t* old_ctrl, + void* old_slots, size_t start) { size_t old_capacity = PreviousCapacity(c.capacity()); const size_t slot_size = policy.slot_size; void* new_slots = c.slot_array(); @@ -1034,7 +1035,7 @@ class ProbedItemEncoder { // Finds new position for each element and transfers it to the new slots. // Returns the total probe length. size_t DecodeAndInsertToTable(CommonFields& common, - const PolicyFunctions& policy, + const PolicyFunctions& __restrict policy, void* old_slots) const { if (pos_ == buffer_) { return 0; @@ -1108,7 +1109,7 @@ class ProbedItemEncoder { // Finds new position for each element and transfers it to the new slots. // Returns the total probe length. ABSL_ATTRIBUTE_NOINLINE size_t DecodeAndInsertToTableOverflow( - CommonFields& common, const PolicyFunctions& policy, + CommonFields& common, const PolicyFunctions& __restrict policy, void* old_slots) const { ABSL_SWISSTABLE_ASSERT(local_buffer_full_ && "must not be called when local buffer is not full"); @@ -1141,7 +1142,8 @@ class ProbedItemEncoder { // Different encoder is used depending on the capacity of the table. // Returns total probe length. template -size_t GrowToNextCapacity(CommonFields& common, const PolicyFunctions& policy, +size_t GrowToNextCapacity(CommonFields& common, + const PolicyFunctions& __restrict policy, ctrl_t* old_ctrl, void* old_slots) { using ProbedItem = typename Encoder::ProbedItem; ABSL_SWISSTABLE_ASSERT(common.capacity() <= ProbedItem::kMaxNewCapacity); @@ -1159,10 +1161,9 @@ size_t GrowToNextCapacity(CommonFields& common, const PolicyFunctions& policy, // Grows to next capacity for relatively small tables so that even if all // elements are probed, we don't need to overflow the local buffer. // Returns total probe length. -size_t GrowToNextCapacityThatFitsInLocalBuffer(CommonFields& common, - const PolicyFunctions& policy, - ctrl_t* old_ctrl, - void* old_slots) { +size_t GrowToNextCapacityThatFitsInLocalBuffer( + CommonFields& common, const PolicyFunctions& __restrict policy, + ctrl_t* old_ctrl, void* old_slots) { ABSL_SWISSTABLE_ASSERT(common.capacity() <= kMaxLocalBufferNewCapacity); return GrowToNextCapacity< ProbedItemEncoder>( @@ -1172,20 +1173,20 @@ size_t GrowToNextCapacityThatFitsInLocalBuffer(CommonFields& common, // Grows to next capacity with different encodings. Returns total probe length. // These functions are useful to simplify profile analysis. size_t GrowToNextCapacity4BytesEncoder(CommonFields& common, - const PolicyFunctions& policy, + const PolicyFunctions& __restrict policy, ctrl_t* old_ctrl, void* old_slots) { return GrowToNextCapacity>( common, policy, old_ctrl, old_slots); } size_t GrowToNextCapacity8BytesEncoder(CommonFields& common, - const PolicyFunctions& policy, + const PolicyFunctions& __restrict policy, ctrl_t* old_ctrl, void* old_slots) { return GrowToNextCapacity>( common, policy, old_ctrl, old_slots); } -size_t GrowToNextCapacity16BytesEncoder(CommonFields& common, - const PolicyFunctions& policy, - ctrl_t* old_ctrl, void* old_slots) { +size_t GrowToNextCapacity16BytesEncoder( + CommonFields& common, const PolicyFunctions& __restrict policy, + ctrl_t* old_ctrl, void* old_slots) { return GrowToNextCapacity>( common, policy, old_ctrl, old_slots); } @@ -1193,10 +1194,9 @@ size_t GrowToNextCapacity16BytesEncoder(CommonFields& common, // Grows to next capacity for tables with relatively large capacity so that we // can't guarantee that all probed elements fit in the local buffer. Returns // total probe length. -size_t GrowToNextCapacityOverflowLocalBuffer(CommonFields& common, - const PolicyFunctions& policy, - ctrl_t* old_ctrl, - void* old_slots) { +size_t GrowToNextCapacityOverflowLocalBuffer( + CommonFields& common, const PolicyFunctions& __restrict policy, + ctrl_t* old_ctrl, void* old_slots) { const size_t new_capacity = common.capacity(); if (ABSL_PREDICT_TRUE(new_capacity <= ProbedItem4Bytes::kMaxNewCapacity)) { return GrowToNextCapacity4BytesEncoder(common, policy, old_ctrl, old_slots); @@ -1212,7 +1212,7 @@ size_t GrowToNextCapacityOverflowLocalBuffer(CommonFields& common, // capacity of the table. Returns total probe length. ABSL_ATTRIBUTE_NOINLINE size_t GrowToNextCapacityDispatch(CommonFields& common, - const PolicyFunctions& policy, + const PolicyFunctions& __restrict policy, ctrl_t* old_ctrl, void* old_slots) { const size_t new_capacity = common.capacity(); if (ABSL_PREDICT_TRUE(new_capacity <= kMaxLocalBufferNewCapacity)) { @@ -1226,9 +1226,9 @@ size_t GrowToNextCapacityDispatch(CommonFields& common, // Grows to next capacity and prepares insert for the given new_hash. // Returns the offset of the new element. -size_t GrowToNextCapacityAndPrepareInsert(CommonFields& common, - const PolicyFunctions& policy, - size_t new_hash) { +size_t GrowToNextCapacityAndPrepareInsert( + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t new_hash) { ABSL_SWISSTABLE_ASSERT(common.growth_left() == 0); const size_t old_capacity = common.capacity(); ABSL_SWISSTABLE_ASSERT(old_capacity == 0 || @@ -1326,9 +1326,9 @@ size_t GrowToNextCapacityAndPrepareInsert(CommonFields& common, // Called whenever the table needs to vacate empty slots either by removing // tombstones via rehash or growth to next capacity. ABSL_ATTRIBUTE_NOINLINE -size_t RehashOrGrowToNextCapacityAndPrepareInsert(CommonFields& common, - const PolicyFunctions& policy, - size_t new_hash) { +size_t RehashOrGrowToNextCapacityAndPrepareInsert( + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t new_hash) { const size_t cap = common.capacity(); ABSL_ASSUME(cap > 0); if (cap > Group::kWidth && @@ -1385,7 +1385,8 @@ size_t RehashOrGrowToNextCapacityAndPrepareInsert(CommonFields& common, // Slow path for PrepareInsertNonSoo that is called when the table has deleted // slots or need to be resized or rehashed. size_t PrepareInsertNonSooSlow(CommonFields& common, - const PolicyFunctions& policy, size_t hash) { + const PolicyFunctions& __restrict policy, + size_t hash) { const GrowthInfo growth_info = common.growth_info(); ABSL_SWISSTABLE_ASSERT(!growth_info.HasNoDeletedAndGrowthLeft()); if (ABSL_PREDICT_TRUE(growth_info.HasNoGrowthLeftAndNoDeleted())) { @@ -1407,7 +1408,6 @@ size_t PrepareInsertNonSooSlow(CommonFields& common, return target.offset; } - // Resizes empty non-allocated SOO table to NextCapacity(SooCapacity()), // forces the table to be sampled and prepares the insert. // SOO tables need to switch from SOO to heap in order to store the infoz. @@ -1416,7 +1416,8 @@ size_t PrepareInsertNonSooSlow(CommonFields& common, // 2. `c.empty()`. ABSL_ATTRIBUTE_NOINLINE size_t GrowEmptySooTableToNextCapacityForceSamplingAndPrepareInsert( - CommonFields& common, const PolicyFunctions& policy, size_t new_hash) { + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t new_hash) { ResizeEmptyNonAllocatedTableImpl(common, policy, NextCapacity(SooCapacity()), /*force_infoz=*/true); PrepareInsertCommon(common); @@ -1433,9 +1434,9 @@ GrowEmptySooTableToNextCapacityForceSamplingAndPrepareInsert( // 2. `c.empty()`. // 3. `new_size > policy.soo_capacity()`. // The table will be attempted to be sampled. -void ReserveEmptyNonAllocatedTableToFitNewSize(CommonFields& common, - const PolicyFunctions& policy, - size_t new_size) { +void ReserveEmptyNonAllocatedTableToFitNewSize( + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t new_size) { ValidateMaxSize(new_size, policy.slot_size); ABSL_ASSUME(new_size > 0); ResizeEmptyNonAllocatedTableImpl(common, policy, SizeToCapacity(new_size), @@ -1452,7 +1453,8 @@ void ReserveEmptyNonAllocatedTableToFitNewSize(CommonFields& common, // 1. `c.capacity() > policy.soo_capacity()` OR `!c.empty()`. // Reserving already allocated tables is considered to be a rare case. ABSL_ATTRIBUTE_NOINLINE void ReserveAllocatedTable( - CommonFields& common, const PolicyFunctions& policy, size_t new_size) { + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t new_size) { const size_t cap = common.capacity(); ValidateMaxSize(new_size, policy.slot_size); ABSL_ASSUME(new_size > 0); @@ -1479,15 +1481,16 @@ void* GetRefForEmptyClass(CommonFields& common) { return &common; } -void ResizeAllocatedTableWithSeedChange(CommonFields& common, - const PolicyFunctions& policy, - size_t new_capacity) { +void ResizeAllocatedTableWithSeedChange( + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t new_capacity) { ResizeNonSooImpl( common, policy, new_capacity, common.infoz()); } void ReserveEmptyNonAllocatedTableToFitBucketCount( - CommonFields& common, const PolicyFunctions& policy, size_t bucket_count) { + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t bucket_count) { size_t new_capacity = NormalizeCapacity(bucket_count); ValidateMaxSize(CapacityToGrowth(new_capacity), policy.slot_size); ResizeEmptyNonAllocatedTableImpl(common, policy, new_capacity, @@ -1496,10 +1499,9 @@ void ReserveEmptyNonAllocatedTableToFitBucketCount( // Resizes a full SOO table to the NextCapacity(SooCapacity()). template -size_t GrowSooTableToNextCapacityAndPrepareInsert(CommonFields& common, - const PolicyFunctions& policy, - size_t new_hash, - ctrl_t soo_slot_ctrl) { +size_t GrowSooTableToNextCapacityAndPrepareInsert( + CommonFields& common, const PolicyFunctions& __restrict policy, + size_t new_hash, ctrl_t soo_slot_ctrl) { AssertSoo(common, policy); if (ABSL_PREDICT_FALSE(soo_slot_ctrl == ctrl_t::kEmpty)) { // The table is empty, it is only used for forced sampling of SOO tables. @@ -1571,14 +1573,15 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert(CommonFields& common, } void GrowFullSooTableToNextCapacityForceSampling( - CommonFields& common, const PolicyFunctions& policy) { + CommonFields& common, const PolicyFunctions& __restrict policy) { AssertFullSoo(common, policy); ResizeFullSooTable( common, policy, NextCapacity(SooCapacity()), ResizeFullSooTableSamplingMode::kForceSampleNoResizeIfUnsampled); } -void Rehash(CommonFields& common, const PolicyFunctions& policy, size_t n) { +void Rehash(CommonFields& common, const PolicyFunctions& __restrict policy, + size_t n) { const size_t cap = common.capacity(); auto clear_backing_array = [&]() { @@ -1645,7 +1648,7 @@ void Rehash(CommonFields& common, const PolicyFunctions& policy, size_t n) { } } -void Copy(CommonFields& common, const PolicyFunctions& policy, +void Copy(CommonFields& common, const PolicyFunctions& __restrict policy, const CommonFields& other, absl::FunctionRef copy_fn) { const size_t size = other.size(); @@ -1721,7 +1724,8 @@ void Copy(CommonFields& common, const PolicyFunctions& policy, } void ReserveTableToFitNewSize(CommonFields& common, - const PolicyFunctions& policy, size_t new_size) { + const PolicyFunctions& __restrict policy, + size_t new_size) { common.reset_reserved_growth(new_size); common.set_reservation_size(new_size); ABSL_SWISSTABLE_ASSERT(new_size > policy.soo_capacity()); @@ -1741,7 +1745,8 @@ void ReserveTableToFitNewSize(CommonFields& common, ReserveAllocatedTable(common, policy, new_size); } -size_t PrepareInsertNonSoo(CommonFields& common, const PolicyFunctions& policy, +size_t PrepareInsertNonSoo(CommonFields& common, + const PolicyFunctions& __restrict policy, size_t hash, FindInfo target) { const bool rehash_for_bug_detection = common.should_rehash_for_bug_detection_on_insert() && From 668f174945fb1df1c662c617799450a344404f0c Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Wed, 7 May 2025 07:28:15 -0700 Subject: [PATCH 006/107] Fix new -Wnullability-completeness warnings found after upgrading the Clang version used in the Linux ARM CI to Clang 19. nullability-completeness is an all-or-nothing warning, and it is enabled by default. If anything is annotated, everything has to be annotated. There are a few types of fixes in this change. The most common is just to add missing annotations. In a few cases, some SFINAE methods are changed not to use pointers at all. In some cases, like cord_internal.h, I removed the single annotation as that is easier than doing the entire complicated file for now. cordz_test_helpers.h uses GoogleTest macros, which bring in pointers that can't easily be annotated, so I removed the annotations from this file as well. Tests are also opted-out of the warning. I'm not sure why the x86 Clang version didn't pick these issues up. PiperOrigin-RevId: 755844859 Change-Id: I7d7f4cc4f6d779f52c86a4d96e07880341244491 --- absl/copts/GENERATED_AbseilCopts.cmake | 3 +- absl/copts/GENERATED_copts.bzl | 3 +- absl/copts/copts.py | 1 + .../debugging/internal/decode_rust_punycode.h | 8 ++--- absl/log/internal/check_op.h | 31 +++++++++++-------- absl/status/internal/statusor_internal.h | 3 +- absl/status/statusor.h | 4 +-- absl/strings/cordz_test_helpers.h | 9 +++--- absl/strings/internal/cord_internal.h | 2 +- absl/strings/str_cat.h | 18 +++++------ absl/strings/substitute.h | 15 ++++----- absl/types/span.h | 18 ++++++----- 12 files changed, 64 insertions(+), 51 deletions(-) diff --git a/absl/copts/GENERATED_AbseilCopts.cmake b/absl/copts/GENERATED_AbseilCopts.cmake index cc0f4bb2d93..7d8af924ba7 100644 --- a/absl/copts/GENERATED_AbseilCopts.cmake +++ b/absl/copts/GENERATED_AbseilCopts.cmake @@ -23,6 +23,7 @@ list(APPEND ABSL_CLANG_CL_TEST_FLAGS "-Wno-implicit-int-conversion" "-Wno-missing-prototypes" "-Wno-missing-variable-declarations" + "-Wno-nullability-completeness" "-Wno-shadow" "-Wno-shorten-64-to-32" "-Wno-sign-compare" @@ -139,7 +140,6 @@ list(APPEND ABSL_LLVM_TEST_FLAGS "-Winvalid-constexpr" "-Wliteral-conversion" "-Wmissing-declarations" - "-Wnullability-completeness" "-Woverlength-strings" "-Wpointer-arith" "-Wself-assign" @@ -165,6 +165,7 @@ list(APPEND ABSL_LLVM_TEST_FLAGS "-Wno-implicit-int-conversion" "-Wno-missing-prototypes" "-Wno-missing-variable-declarations" + "-Wno-nullability-completeness" "-Wno-shadow" "-Wno-shorten-64-to-32" "-Wno-sign-compare" diff --git a/absl/copts/GENERATED_copts.bzl b/absl/copts/GENERATED_copts.bzl index 35319f08afe..23896e9dc25 100644 --- a/absl/copts/GENERATED_copts.bzl +++ b/absl/copts/GENERATED_copts.bzl @@ -24,6 +24,7 @@ ABSL_CLANG_CL_TEST_FLAGS = [ "-Wno-implicit-int-conversion", "-Wno-missing-prototypes", "-Wno-missing-variable-declarations", + "-Wno-nullability-completeness", "-Wno-shadow", "-Wno-shorten-64-to-32", "-Wno-sign-compare", @@ -140,7 +141,6 @@ ABSL_LLVM_TEST_FLAGS = [ "-Winvalid-constexpr", "-Wliteral-conversion", "-Wmissing-declarations", - "-Wnullability-completeness", "-Woverlength-strings", "-Wpointer-arith", "-Wself-assign", @@ -166,6 +166,7 @@ ABSL_LLVM_TEST_FLAGS = [ "-Wno-implicit-int-conversion", "-Wno-missing-prototypes", "-Wno-missing-variable-declarations", + "-Wno-nullability-completeness", "-Wno-shadow", "-Wno-shorten-64-to-32", "-Wno-sign-compare", diff --git a/absl/copts/copts.py b/absl/copts/copts.py index 941528e0ba4..8cf8f310724 100644 --- a/absl/copts/copts.py +++ b/absl/copts/copts.py @@ -93,6 +93,7 @@ "-Wno-implicit-int-conversion", "-Wno-missing-prototypes", "-Wno-missing-variable-declarations", + "-Wno-nullability-completeness", "-Wno-shadow", "-Wno-shorten-64-to-32", "-Wno-sign-compare", diff --git a/absl/debugging/internal/decode_rust_punycode.h b/absl/debugging/internal/decode_rust_punycode.h index b1b1c97feea..44aad8adb2b 100644 --- a/absl/debugging/internal/decode_rust_punycode.h +++ b/absl/debugging/internal/decode_rust_punycode.h @@ -23,10 +23,10 @@ ABSL_NAMESPACE_BEGIN namespace debugging_internal { struct DecodeRustPunycodeOptions { - const char* punycode_begin; - const char* punycode_end; - char* out_begin; - char* out_end; + const char* absl_nonnull punycode_begin; + const char* absl_nonnull punycode_end; + char* absl_nonnull out_begin; + char* absl_nonnull out_end; }; // Given Rust Punycode in `punycode_begin .. punycode_end`, writes the diff --git a/absl/log/internal/check_op.h b/absl/log/internal/check_op.h index dc7d19e9a13..725340282bd 100644 --- a/absl/log/internal/check_op.h +++ b/absl/log/internal/check_op.h @@ -224,7 +224,7 @@ inline void MakeCheckOpValueString(std::ostream& os, const T& v) { void MakeCheckOpValueString(std::ostream& os, char v); void MakeCheckOpValueString(std::ostream& os, signed char v); void MakeCheckOpValueString(std::ostream& os, unsigned char v); -void MakeCheckOpValueString(std::ostream& os, const void* p); +void MakeCheckOpValueString(std::ostream& os, const void* absl_nullable p); namespace detect_specialization { @@ -266,8 +266,9 @@ float operator<<(std::ostream&, float value); double operator<<(std::ostream&, double value); long double operator<<(std::ostream&, long double value); bool operator<<(std::ostream&, bool value); -const void* operator<<(std::ostream&, const void* value); -const void* operator<<(std::ostream&, std::nullptr_t); +const void* absl_nullable operator<<(std::ostream&, + const void* absl_nullable value); +const void* absl_nullable operator<<(std::ostream&, std::nullptr_t); // These `char` overloads are specified like this in the standard, so we have to // write them exactly the same to ensure the call is ambiguous. @@ -281,13 +282,14 @@ signed char operator<<(std::basic_ostream&, signed char); template unsigned char operator<<(std::basic_ostream&, unsigned char); template -const char* operator<<(std::basic_ostream&, const char*); +const char* absl_nonnull operator<<(std::basic_ostream&, + const char* absl_nonnull); template -const signed char* operator<<(std::basic_ostream&, - const signed char*); +const signed char* absl_nonnull operator<<(std::basic_ostream&, + const signed char* absl_nonnull); template -const unsigned char* operator<<(std::basic_ostream&, - const unsigned char*); +const unsigned char* absl_nonnull operator<<(std::basic_ostream&, + const unsigned char* absl_nonnull); // This overload triggers when the call is not ambiguous. // It means that T is being printed with some overload not on this list. @@ -312,7 +314,8 @@ class StringifySink { void Append(absl::string_view text); void Append(size_t length, char ch); - friend void AbslFormatFlush(StringifySink* sink, absl::string_view text); + friend void AbslFormatFlush(StringifySink* absl_nonnull sink, + absl::string_view text); private: std::ostream& os_; @@ -376,10 +379,12 @@ ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(char); ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(unsigned char); ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(const std::string&); ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(const absl::string_view&); -ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(const char*); -ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(const signed char*); -ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(const unsigned char*); -ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(const void*); +ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(const char* absl_nonnull); +ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN( + const signed char* absl_nonnull); +ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN( + const unsigned char* absl_nonnull); +ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN(const void* absl_nonnull); #undef ABSL_LOG_INTERNAL_DEFINE_MAKE_CHECK_OP_STRING_EXTERN // `ABSL_LOG_INTERNAL_CHECK_OP_IMPL_RESULT` skips formatting the Check_OP result diff --git a/absl/status/internal/statusor_internal.h b/absl/status/internal/statusor_internal.h index ca7c5502c2e..e986611396e 100644 --- a/absl/status/internal/statusor_internal.h +++ b/absl/status/internal/statusor_internal.h @@ -39,7 +39,8 @@ template struct HasConversionOperatorToStatusOr : std::false_type {}; template -void test(char (*)[sizeof(std::declval().operator absl::StatusOr())]); +void test(char (*absl_nullable)[sizeof( + std::declval().operator absl::StatusOr())]); template struct HasConversionOperatorToStatusOr(0))> diff --git a/absl/status/statusor.h b/absl/status/statusor.h index 5257af0eb24..6142a2f8dd7 100644 --- a/absl/status/statusor.h +++ b/absl/status/statusor.h @@ -520,8 +520,8 @@ class StatusOr : private internal_statusor::StatusOrData, // REQUIRES: `this->ok() == true`, otherwise the behavior is undefined. // // Use `this->ok()` to verify that there is a current value. - const T* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND; - T* operator->() ABSL_ATTRIBUTE_LIFETIME_BOUND; + const T* absl_nonnull operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND; + T* absl_nonnull operator->() ABSL_ATTRIBUTE_LIFETIME_BOUND; // StatusOr::value_or() // diff --git a/absl/strings/cordz_test_helpers.h b/absl/strings/cordz_test_helpers.h index 98117099376..66232db7caa 100644 --- a/absl/strings/cordz_test_helpers.h +++ b/absl/strings/cordz_test_helpers.h @@ -34,16 +34,15 @@ namespace absl { ABSL_NAMESPACE_BEGIN // Returns the CordzInfo for the cord, or nullptr if the cord is not sampled. -inline const cord_internal::CordzInfo* absl_nullable GetCordzInfoForTesting( +inline const cord_internal::CordzInfo* GetCordzInfoForTesting( const Cord& cord) { if (!cord.contents_.is_tree()) return nullptr; return cord.contents_.cordz_info(); } // Returns true if the provided cordz_info is in the list of sampled cords. -inline bool CordzInfoIsListed( - const cord_internal::CordzInfo* absl_nonnull cordz_info, - cord_internal::CordzSampleToken token = {}) { +inline bool CordzInfoIsListed(const cord_internal::CordzInfo* cordz_info, + cord_internal::CordzSampleToken token = {}) { for (const cord_internal::CordzInfo& info : token) { if (cordz_info == &info) return true; } @@ -121,7 +120,7 @@ class CordzSamplingIntervalHelper { // Wrapper struct managing a small CordRep `rep` struct TestCordRep { - cord_internal::CordRepFlat* absl_nonnull rep; + cord_internal::CordRepFlat* rep; TestCordRep() { rep = cord_internal::CordRepFlat::New(100); diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index b55b412c091..cf1f703bfc6 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -635,7 +635,7 @@ class InlineData { poison(); } - void CopyInlineToString(std::string* absl_nonnull dst) const { + void CopyInlineToString(std::string* dst) const { assert(!is_tree()); // As Cord can store only 15 bytes it is smaller than std::string's // small string optimization buffer size. Therefore we will always trigger diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index eafd8a3f0e9..227b22af239 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -191,26 +191,26 @@ struct Hex { template explicit Hex( Int v, PadSpec spec = absl::kNoPad, - typename std::enable_if::value>::type* = nullptr) + std::enable_if_t::value, bool> = + true) : Hex(spec, static_cast(v)) {} template explicit Hex( Int v, PadSpec spec = absl::kNoPad, - typename std::enable_if::value>::type* = nullptr) + std::enable_if_t::value, bool> = + true) : Hex(spec, static_cast(v)) {} template explicit Hex( Int v, PadSpec spec = absl::kNoPad, - typename std::enable_if::value>::type* = nullptr) + std::enable_if_t::value, bool> = + true) : Hex(spec, static_cast(v)) {} template explicit Hex( Int v, PadSpec spec = absl::kNoPad, - typename std::enable_if::value>::type* = nullptr) + std::enable_if_t::value, bool> = + true) : Hex(spec, static_cast(v)) {} template explicit Hex(Pointee* absl_nullable v, PadSpec spec = absl::kNoPad) @@ -262,7 +262,7 @@ struct Dec { template explicit Dec(Int v, PadSpec spec = absl::kNoPad, - typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr) + std::enable_if_t = true) : value(v >= 0 ? static_cast(v) : uint64_t{0} - static_cast(v)), width(spec == absl::kNoPad ? 1 diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h index 08f64e99164..c93b1cc645f 100644 --- a/absl/strings/substitute.h +++ b/absl/strings/substitute.h @@ -187,12 +187,13 @@ class Arg { // vector::reference and const_reference require special help to convert // to `Arg` because it requires two user defined conversions. - template ::value && - (std::is_same::reference>::value || - std::is_same::const_reference>::value)>* = - nullptr> + template < + typename T, + std::enable_if_t< + std::is_class::value && + (std::is_same::reference>::value || + std::is_same::const_reference>::value), + bool> = true> Arg(T value) // NOLINT(google-explicit-constructor) : Arg(static_cast(value)) {} @@ -237,7 +238,7 @@ constexpr int CalculateOneBit(const char* absl_nonnull format) { : (1 << (*format - '0')); } -constexpr const char* SkipNumber(const char* absl_nonnull format) { +constexpr const char* absl_nonnull SkipNumber(const char* absl_nonnull format) { return !*format ? format : (format + 1); } diff --git a/absl/types/span.h b/absl/types/span.h index 444b2ae6944..ca9deaf7959 100644 --- a/absl/types/span.h +++ b/absl/types/span.h @@ -220,7 +220,7 @@ class ABSL_ATTRIBUTE_VIEW Span { static const size_type npos = ~(size_type(0)); constexpr Span() noexcept : Span(nullptr, 0) {} - constexpr Span(pointer array ABSL_ATTRIBUTE_LIFETIME_BOUND, + constexpr Span(pointer absl_nullable array ABSL_ATTRIBUTE_LIFETIME_BOUND, size_type length) noexcept : ptr_(array), len_(length) {} @@ -310,7 +310,7 @@ class ABSL_ATTRIBUTE_VIEW Span { // // Returns a pointer to the span's underlying array of data (which is held // outside the span). - constexpr pointer data() const noexcept { return ptr_; } + constexpr pointer absl_nullable data() const noexcept { return ptr_; } // Span::size() // @@ -368,27 +368,31 @@ class ABSL_ATTRIBUTE_VIEW Span { // // Returns an iterator pointing to the first element of this span, or `end()` // if the span is empty. - constexpr iterator begin() const noexcept { return data(); } + constexpr iterator absl_nullable begin() const noexcept { return data(); } // Span::cbegin() // // Returns a const iterator pointing to the first element of this span, or // `end()` if the span is empty. - constexpr const_iterator cbegin() const noexcept { return begin(); } + constexpr const_iterator absl_nullable cbegin() const noexcept { + return begin(); + } // Span::end() // // Returns an iterator pointing just beyond the last element at the // end of this span. This iterator acts as a placeholder; attempting to // access it results in undefined behavior. - constexpr iterator end() const noexcept { return data() + size(); } + constexpr iterator absl_nullable end() const noexcept { + return data() + size(); + } // Span::cend() // // Returns a const iterator pointing just beyond the last element at the // end of this span. This iterator acts as a placeholder; attempting to // access it results in undefined behavior. - constexpr const_iterator cend() const noexcept { return end(); } + constexpr const_iterator absl_nullable cend() const noexcept { return end(); } // Span::rbegin() // @@ -503,7 +507,7 @@ class ABSL_ATTRIBUTE_VIEW Span { } private: - pointer ptr_; + pointer absl_nullable ptr_; size_type len_; }; From cdd3d21fce01e78756161122d70cdf5cdfe18328 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Wed, 7 May 2025 08:26:41 -0700 Subject: [PATCH 007/107] Linux CI update GCC latest is now GCC-15.1 Clang on ARM is now Clang-19 Bazel 8.2.1 CMake 4.0.1 Enable Bazel "Vendor Mode" as a caching mechanism to reduce GitHub download failures. PiperOrigin-RevId: 755864516 Change-Id: I9bf38e20fb8a09739406798119c50ce1aa934f43 --- ci/cmake_common.sh | 2 +- ci/linux_arm_clang-latest_libcxx_bazel.sh | 16 ++++---- ci/linux_clang-latest_libcxx_asan_bazel.sh | 39 ++++++++++---------- ci/linux_clang-latest_libcxx_bazel.sh | 14 +++---- ci/linux_clang-latest_libcxx_tsan_bazel.sh | 43 +++++++++++----------- ci/linux_clang-latest_libstdcxx_bazel.sh | 37 ++++++++++--------- ci/linux_docker_containers.sh | 8 ++-- ci/linux_gcc-floor_libstdcxx_bazel.sh | 31 ++++++++-------- ci/linux_gcc-latest_libstdcxx_bazel.sh | 14 +++---- ci/windows_msvc_cmake.bat | 2 +- 10 files changed, 105 insertions(+), 101 deletions(-) diff --git a/ci/cmake_common.sh b/ci/cmake_common.sh index 3e14ca35181..484230cdf05 100644 --- a/ci/cmake_common.sh +++ b/ci/cmake_common.sh @@ -14,6 +14,6 @@ # The commit of GoogleTest to be used in the CMake tests in this directory. # Keep this in sync with the commit in the MODULE.bazel file. -readonly ABSL_GOOGLETEST_VERSION="1.16.0" +readonly ABSL_GOOGLETEST_VERSION="1.17.0" readonly ABSL_GOOGLETEST_DOWNLOAD_URL="https://github.com/google/googletest/releases/download/v${ABSL_GOOGLETEST_VERSION}/googletest-${ABSL_GOOGLETEST_VERSION}.tar.gz" diff --git a/ci/linux_arm_clang-latest_libcxx_bazel.sh b/ci/linux_arm_clang-latest_libcxx_bazel.sh index d9e5992a7d4..631a8bd706b 100755 --- a/ci/linux_arm_clang-latest_libcxx_bazel.sh +++ b/ci/linux_arm_clang-latest_libcxx_bazel.sh @@ -51,12 +51,12 @@ if [[ ${USE_BAZEL_CACHE:-0} -ne 0 ]]; then BAZEL_EXTRA_ARGS="--remote_cache=https://storage.googleapis.com/absl-bazel-remote-cache/${container_key} --google_credentials=/keystore/73103_absl-bazel-remote-cache ${BAZEL_EXTRA_ARGS:-}" fi -# Avoid depending on external sites like GitHub by checking --distdir for -# external dependencies first. -# https://docs.bazel.build/versions/master/guide.html#distdir -if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -d "${KOKORO_GFILE_DIR}/distdir" ]]; then - DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly ${DOCKER_EXTRA_ARGS:-}" - BAZEL_EXTRA_ARGS="--distdir=/distdir ${BAZEL_EXTRA_ARGS:-}" +# Use Bazel Vendor mode to reduce reliance on external dependencies. +# See https://bazel.build/external/vendor and the Dockerfile for +# an explaination of how this works. +if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f "${KOKORO_GFILE_DIR}/distdir/abseil-cpp_vendor.tar.gz" ]]; then + DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly --env=BAZEL_VENDOR_ARCHIVE=/distdir/abseil-cpp_vendor.tar.gz ${DOCKER_EXTRA_ARGS:-}" + BAZEL_EXTRA_ARGS="--vendor_dir=/abseil-cpp_vendor ${BAZEL_EXTRA_ARGS:-}" fi for std in ${STD}; do @@ -71,13 +71,13 @@ for std in ${STD}; do --rm \ ${DOCKER_EXTRA_ARGS:-} \ ${DOCKER_CONTAINER} \ - /bin/sh -c " + /bin/bash --login -c " cp -r /abseil-cpp-ro/* /abseil-cpp/ if [ -n \"${ALTERNATE_OPTIONS:-}\" ]; then cp ${ALTERNATE_OPTIONS:-} absl/base/options.h || exit 1 fi /usr/local/bin/bazel test ... \ - --action_env=CC=clang-18 \ + --action_env=CC=clang-19 \ --compilation_mode=\"${compilation_mode}\" \ --copt=\"${exceptions_mode}\" \ --copt=\"-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1\" \ diff --git a/ci/linux_clang-latest_libcxx_asan_bazel.sh b/ci/linux_clang-latest_libcxx_asan_bazel.sh index c83f3a05a6c..cfc551080db 100755 --- a/ci/linux_clang-latest_libcxx_asan_bazel.sh +++ b/ci/linux_clang-latest_libcxx_asan_bazel.sh @@ -73,32 +73,33 @@ for std in ${STD}; do --rm \ ${DOCKER_EXTRA_ARGS:-} \ ${DOCKER_CONTAINER} \ + /bin/bash --login -c " /usr/local/bin/bazel test ... \ - --action_env="CC=/opt/llvm/clang/bin/clang" \ - --action_env="BAZEL_CXXOPTS=-std=${std}:-nostdinc++" \ - --action_env="BAZEL_LINKOPTS=-L/opt/llvm/libcxx/lib:-lc++:-lc++abi:-lm:-Wl,-rpath=/opt/llvm/libcxx/lib" \ - --action_env="CPLUS_INCLUDE_PATH=/opt/llvm/libcxx/include/c++/v1" \ - --compilation_mode="${compilation_mode}" \ - --copt="${exceptions_mode}" \ - --copt="-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1" \ - --copt="-fsanitize=address" \ - --copt="-fsanitize=${UBSAN_CHECKS}" \ - --copt="-fno-sanitize-recover=${UBSAN_CHECKS}" \ - --copt="-fno-sanitize-blacklist" \ + --action_env=\"CC=/opt/llvm/clang/bin/clang\" \ + --action_env=\"BAZEL_CXXOPTS=-std=${std}:-nostdinc++\" \ + --action_env=\"BAZEL_LINKOPTS=-L/opt/llvm/libcxx/lib:-lc++:-lc++abi:-lm:-Wl,-rpath=/opt/llvm/libcxx/lib\" \ + --action_env=\"CPLUS_INCLUDE_PATH=/opt/llvm/libcxx/include/c++/v1\" \ + --compilation_mode=\"${compilation_mode}\" \ + --copt=\"${exceptions_mode}\" \ + --copt=\"-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1\" \ + --copt=\"-fsanitize=address\" \ + --copt=\"-fsanitize=${UBSAN_CHECKS}\" \ + --copt=\"-fno-sanitize-recover=${UBSAN_CHECKS}\" \ + --copt=\"-fno-sanitize-blacklist\" \ --copt=-Werror \ --enable_bzlmod=true \ --features=external_include_paths \ --keep_going \ - --linkopt="-fsanitize=address" \ - --linkopt="-fsanitize-link-c++-runtime" \ + --linkopt=\"-fsanitize=address\" \ + --linkopt=\"-fsanitize-link-c++-runtime\" \ --show_timestamps \ - --test_env="ASAN_SYMBOLIZER_PATH=/opt/llvm/clang/bin/llvm-symbolizer" \ - --test_env="TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo" \ - --test_env="UBSAN_OPTIONS=print_stacktrace=1" \ - --test_env="UBSAN_SYMBOLIZER_PATH=/opt/llvm/clang/bin/llvm-symbolizer" \ + --test_env=\"ASAN_SYMBOLIZER_PATH=/opt/llvm/clang/bin/llvm-symbolizer\" \ + --test_env=\"TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo\" \ + --test_env=\"UBSAN_OPTIONS=print_stacktrace=1\" \ + --test_env=\"UBSAN_SYMBOLIZER_PATH=/opt/llvm/clang/bin/llvm-symbolizer\" \ --test_output=errors \ - --test_tag_filters="-benchmark,-noasan" \ - ${BAZEL_EXTRA_ARGS:-} + --test_tag_filters=\"-benchmark,-noasan\" \ + ${BAZEL_EXTRA_ARGS:-}" done done done diff --git a/ci/linux_clang-latest_libcxx_bazel.sh b/ci/linux_clang-latest_libcxx_bazel.sh index 832a9d8baf4..5c51d158345 100755 --- a/ci/linux_clang-latest_libcxx_bazel.sh +++ b/ci/linux_clang-latest_libcxx_bazel.sh @@ -51,12 +51,12 @@ if [[ ${USE_BAZEL_CACHE:-0} -ne 0 ]]; then BAZEL_EXTRA_ARGS="--remote_cache=https://storage.googleapis.com/absl-bazel-remote-cache/${container_key} --google_credentials=/keystore/73103_absl-bazel-remote-cache ${BAZEL_EXTRA_ARGS:-}" fi -# Avoid depending on external sites like GitHub by checking --distdir for -# external dependencies first. -# https://docs.bazel.build/versions/master/guide.html#distdir -if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -d "${KOKORO_GFILE_DIR}/distdir" ]]; then - DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly ${DOCKER_EXTRA_ARGS:-}" - BAZEL_EXTRA_ARGS="--distdir=/distdir ${BAZEL_EXTRA_ARGS:-}" +# Use Bazel Vendor mode to reduce reliance on external dependencies. +# See https://bazel.build/external/vendor and the Dockerfile for +# an explaination of how this works. +if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f "${KOKORO_GFILE_DIR}/distdir/abseil-cpp_vendor.tar.gz" ]]; then + DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly --env=BAZEL_VENDOR_ARCHIVE=/distdir/abseil-cpp_vendor.tar.gz ${DOCKER_EXTRA_ARGS:-}" + BAZEL_EXTRA_ARGS="--vendor_dir=/abseil-cpp_vendor ${BAZEL_EXTRA_ARGS:-}" fi for std in ${STD}; do @@ -71,7 +71,7 @@ for std in ${STD}; do --rm \ ${DOCKER_EXTRA_ARGS:-} \ ${DOCKER_CONTAINER} \ - /bin/sh -c " + /bin/bash --login -c " cp -r /abseil-cpp-ro/* /abseil-cpp/ if [ -n \"${ALTERNATE_OPTIONS:-}\" ]; then cp ${ALTERNATE_OPTIONS:-} absl/base/options.h || exit 1 diff --git a/ci/linux_clang-latest_libcxx_tsan_bazel.sh b/ci/linux_clang-latest_libcxx_tsan_bazel.sh index 82b4dd16b15..c9ea22d8246 100755 --- a/ci/linux_clang-latest_libcxx_tsan_bazel.sh +++ b/ci/linux_clang-latest_libcxx_tsan_bazel.sh @@ -51,12 +51,12 @@ if [[ ${USE_BAZEL_CACHE:-0} -ne 0 ]]; then BAZEL_EXTRA_ARGS="--remote_cache=https://storage.googleapis.com/absl-bazel-remote-cache/${container_key} --google_credentials=/keystore/73103_absl-bazel-remote-cache ${BAZEL_EXTRA_ARGS:-}" fi -# Avoid depending on external sites like GitHub by checking --distdir for -# external dependencies first. -# https://docs.bazel.build/versions/master/guide.html#distdir -if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -d "${KOKORO_GFILE_DIR}/distdir" ]]; then - DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly ${DOCKER_EXTRA_ARGS:-}" - BAZEL_EXTRA_ARGS="--distdir=/distdir ${BAZEL_EXTRA_ARGS:-}" +# Use Bazel Vendor mode to reduce reliance on external dependencies. +# See https://bazel.build/external/vendor and the Dockerfile for +# an explaination of how this works. +if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f "${KOKORO_GFILE_DIR}/distdir/abseil-cpp_vendor.tar.gz" ]]; then + DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly --env=BAZEL_VENDOR_ARCHIVE=/distdir/abseil-cpp_vendor.tar.gz ${DOCKER_EXTRA_ARGS:-}" + BAZEL_EXTRA_ARGS="--vendor_dir=/abseil-cpp_vendor ${BAZEL_EXTRA_ARGS:-}" fi for std in ${STD}; do @@ -70,28 +70,29 @@ for std in ${STD}; do --rm \ ${DOCKER_EXTRA_ARGS:-} \ ${DOCKER_CONTAINER} \ + /bin/bash --login -c " /usr/local/bin/bazel test ... \ - --action_env="CC=/opt/llvm/clang/bin/clang" \ - --action_env="BAZEL_CXXOPTS=-std=${std}:-nostdinc++" \ - --action_env="BAZEL_LINKOPTS=-L/opt/llvm/libcxx-tsan/lib:-lc++:-lc++abi:-lm:-Wl,-rpath=/opt/llvm/libcxx-tsan/lib" \ - --action_env="CPLUS_INCLUDE_PATH=/opt/llvm/libcxx-tsan/include/c++/v1" \ - --build_tag_filters="-notsan" \ - --compilation_mode="${compilation_mode}" \ - --copt="${exceptions_mode}" \ - --copt="-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1" \ - --copt="-fsanitize=thread" \ - --copt="-fno-sanitize-blacklist" \ + --action_env=\"CC=/opt/llvm/clang/bin/clang\" \ + --action_env=\"BAZEL_CXXOPTS=-std=${std}:-nostdinc++\" \ + --action_env=\"BAZEL_LINKOPTS=-L/opt/llvm/libcxx-tsan/lib:-lc++:-lc++abi:-lm:-Wl,-rpath=/opt/llvm/libcxx-tsan/lib\" \ + --action_env=\"CPLUS_INCLUDE_PATH=/opt/llvm/libcxx-tsan/include/c++/v1\" \ + --build_tag_filters=\"-notsan\" \ + --compilation_mode=\"${compilation_mode}\" \ + --copt=\"${exceptions_mode}\" \ + --copt=\"-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1\" \ + --copt=\"-fsanitize=thread\" \ + --copt=\"-fno-sanitize-blacklist\" \ --copt=-Werror \ --enable_bzlmod=true \ --features=external_include_paths \ --keep_going \ - --linkopt="-fsanitize=thread" \ + --linkopt=\"-fsanitize=thread\" \ --show_timestamps \ - --test_env="TSAN_SYMBOLIZER_PATH=/opt/llvm/clang/bin/llvm-symbolizer" \ - --test_env="TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo" \ + --test_env=\"TSAN_SYMBOLIZER_PATH=/opt/llvm/clang/bin/llvm-symbolizer\" \ + --test_env=\"TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo\" \ --test_output=errors \ - --test_tag_filters="-benchmark,-notsan" \ - ${BAZEL_EXTRA_ARGS:-} + --test_tag_filters=\"-benchmark,-notsan\" \ + ${BAZEL_EXTRA_ARGS:-}" done done done diff --git a/ci/linux_clang-latest_libstdcxx_bazel.sh b/ci/linux_clang-latest_libstdcxx_bazel.sh index 06aef6219ee..a1620e01698 100755 --- a/ci/linux_clang-latest_libstdcxx_bazel.sh +++ b/ci/linux_clang-latest_libstdcxx_bazel.sh @@ -51,12 +51,12 @@ if [[ ${USE_BAZEL_CACHE:-0} -ne 0 ]]; then BAZEL_EXTRA_ARGS="--remote_cache=https://storage.googleapis.com/absl-bazel-remote-cache/${container_key} --google_credentials=/keystore/73103_absl-bazel-remote-cache ${BAZEL_EXTRA_ARGS:-}" fi -# Avoid depending on external sites like GitHub by checking --distdir for -# external dependencies first. -# https://docs.bazel.build/versions/master/guide.html#distdir -if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -d "${KOKORO_GFILE_DIR}/distdir" ]]; then - DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly ${DOCKER_EXTRA_ARGS:-}" - BAZEL_EXTRA_ARGS="--distdir=/distdir ${BAZEL_EXTRA_ARGS:-}" +# Use Bazel Vendor mode to reduce reliance on external dependencies. +# See https://bazel.build/external/vendor and the Dockerfile for +# an explaination of how this works. +if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f "${KOKORO_GFILE_DIR}/distdir/abseil-cpp_vendor.tar.gz" ]]; then + DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly --env=BAZEL_VENDOR_ARCHIVE=/distdir/abseil-cpp_vendor.tar.gz ${DOCKER_EXTRA_ARGS:-}" + BAZEL_EXTRA_ARGS="--vendor_dir=/abseil-cpp_vendor ${BAZEL_EXTRA_ARGS:-}" fi for std in ${STD}; do @@ -70,26 +70,27 @@ for std in ${STD}; do --rm \ ${DOCKER_EXTRA_ARGS:-} \ ${DOCKER_CONTAINER} \ + /bin/bash --login -c " /usr/local/bin/bazel test ... \ - --action_env="CC=/opt/llvm/clang/bin/clang" \ - --action_env="BAZEL_CXXOPTS=-std=${std}" \ - --compilation_mode="${compilation_mode}" \ - --copt="--gcc-toolchain=/usr/local" \ - --copt="-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1" \ - --copt="${exceptions_mode}" \ - --copt="-march=haswell" \ + --action_env=\"CC=/opt/llvm/clang/bin/clang\" \ + --action_env=\"BAZEL_CXXOPTS=-std=${std}\" \ + --compilation_mode=\"${compilation_mode}\" \ + --copt=\"--gcc-toolchain=/usr/local\" \ + --copt=\"-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1\" \ + --copt=\"${exceptions_mode}\" \ + --copt=\"-march=haswell\" \ --copt=-Werror \ - --define="absl=1" \ + --define=\"absl=1\" \ --enable_bzlmod=true \ --features=external_include_paths \ --keep_going \ - --linkopt="--gcc-toolchain=/usr/local" \ + --linkopt=\"--gcc-toolchain=/usr/local\" \ --show_timestamps \ - --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1" \ - --test_env="TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo" \ + --test_env=\"GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1\" \ + --test_env=\"TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo\" \ --test_output=errors \ --test_tag_filters=-benchmark \ - ${BAZEL_EXTRA_ARGS:-} + ${BAZEL_EXTRA_ARGS:-}" done done done diff --git a/ci/linux_docker_containers.sh b/ci/linux_docker_containers.sh index 3f824a8e744..0f454716681 100644 --- a/ci/linux_docker_containers.sh +++ b/ci/linux_docker_containers.sh @@ -16,7 +16,7 @@ # Test scripts should source this file to get the identifiers. readonly LINUX_ALPINE_CONTAINER="gcr.io/google.com/absl-177019/alpine:20230612" -readonly LINUX_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20241218" -readonly LINUX_ARM_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_arm_hybrid-latest:20250224" -readonly LINUX_GCC_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20241218" -readonly LINUX_GCC_FLOOR_CONTAINER="gcr.io/google.com/absl-177019/linux_gcc-floor:20250205" +readonly LINUX_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20250430" +readonly LINUX_ARM_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_arm_hybrid-latest:20250430" +readonly LINUX_GCC_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20250430" +readonly LINUX_GCC_FLOOR_CONTAINER="gcr.io/google.com/absl-177019/linux_gcc-floor:20250430" diff --git a/ci/linux_gcc-floor_libstdcxx_bazel.sh b/ci/linux_gcc-floor_libstdcxx_bazel.sh index 74d996ab53c..b683b60c2fd 100755 --- a/ci/linux_gcc-floor_libstdcxx_bazel.sh +++ b/ci/linux_gcc-floor_libstdcxx_bazel.sh @@ -51,12 +51,12 @@ if [[ ${USE_BAZEL_CACHE:-0} -ne 0 ]]; then BAZEL_EXTRA_ARGS="--remote_http_cache=https://storage.googleapis.com/absl-bazel-remote-cache/${container_key} --google_credentials=/keystore/73103_absl-bazel-remote-cache ${BAZEL_EXTRA_ARGS:-}" fi -# Avoid depending on external sites like GitHub by checking --distdir for -# external dependencies first. -# https://docs.bazel.build/versions/master/guide.html#distdir -if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -d "${KOKORO_GFILE_DIR}/distdir" ]]; then - DOCKER_EXTRA_ARGS="--volume=${KOKORO_GFILE_DIR}/distdir:/distdir:ro ${DOCKER_EXTRA_ARGS:-}" - BAZEL_EXTRA_ARGS="--distdir=/distdir ${BAZEL_EXTRA_ARGS:-}" +# Use Bazel Vendor mode to reduce reliance on external dependencies. +# See https://bazel.build/external/vendor and the Dockerfile for +# an explaination of how this works. +if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f "${KOKORO_GFILE_DIR}/distdir/abseil-cpp_vendor.tar.gz" ]]; then + DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly --env=BAZEL_VENDOR_ARCHIVE=/distdir/abseil-cpp_vendor.tar.gz ${DOCKER_EXTRA_ARGS:-}" + BAZEL_EXTRA_ARGS="--vendor_dir=/abseil-cpp_vendor ${BAZEL_EXTRA_ARGS:-}" fi for std in ${STD}; do @@ -70,22 +70,23 @@ for std in ${STD}; do --rm \ ${DOCKER_EXTRA_ARGS:-} \ ${DOCKER_CONTAINER} \ + /bin/bash --login -c " /usr/local/bin/bazel test ... \ - --action_env="CC=/usr/local/bin/gcc" \ - --action_env="BAZEL_CXXOPTS=-std=${std}" \ - --compilation_mode="${compilation_mode}" \ - --copt="${exceptions_mode}" \ - --copt="-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1" \ + --action_env=\"CC=/usr/local/bin/gcc\" \ + --action_env=\"BAZEL_CXXOPTS=-std=${std}\" \ + --compilation_mode=\"${compilation_mode}\" \ + --copt=\"${exceptions_mode}\" \ + --copt=\"-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1\" \ --copt=-Werror \ - --define="absl=1" \ + --define=\"absl=1\" \ --features=external_include_paths \ --keep_going \ --show_timestamps \ - --test_env="GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1" \ - --test_env="TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo" \ + --test_env=\"GTEST_INSTALL_FAILURE_SIGNAL_HANDLER=1\" \ + --test_env=\"TZDIR=/abseil-cpp/absl/time/internal/cctz/testdata/zoneinfo\" \ --test_output=errors \ --test_tag_filters=-benchmark \ - ${BAZEL_EXTRA_ARGS:-} + ${BAZEL_EXTRA_ARGS:-}" done done done diff --git a/ci/linux_gcc-latest_libstdcxx_bazel.sh b/ci/linux_gcc-latest_libstdcxx_bazel.sh index 2daa13263ae..b092c1d6d4c 100755 --- a/ci/linux_gcc-latest_libstdcxx_bazel.sh +++ b/ci/linux_gcc-latest_libstdcxx_bazel.sh @@ -51,12 +51,12 @@ if [[ ${USE_BAZEL_CACHE:-0} -ne 0 ]]; then BAZEL_EXTRA_ARGS="--remote_cache=https://storage.googleapis.com/absl-bazel-remote-cache/${container_key} --google_credentials=/keystore/73103_absl-bazel-remote-cache ${BAZEL_EXTRA_ARGS:-}" fi -# Avoid depending on external sites like GitHub by checking --distdir for -# external dependencies first. -# https://docs.bazel.build/versions/master/guide.html#distdir -if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -d "${KOKORO_GFILE_DIR}/distdir" ]]; then - DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly ${DOCKER_EXTRA_ARGS:-}" - BAZEL_EXTRA_ARGS="--distdir=/distdir ${BAZEL_EXTRA_ARGS:-}" +# Use Bazel Vendor mode to reduce reliance on external dependencies. +# See https://bazel.build/external/vendor and the Dockerfile for +# an explaination of how this works. +if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f "${KOKORO_GFILE_DIR}/distdir/abseil-cpp_vendor.tar.gz" ]]; then + DOCKER_EXTRA_ARGS="--mount type=bind,source=${KOKORO_GFILE_DIR}/distdir,target=/distdir,readonly --env=BAZEL_VENDOR_ARCHIVE=/distdir/abseil-cpp_vendor.tar.gz ${DOCKER_EXTRA_ARGS:-}" + BAZEL_EXTRA_ARGS="--vendor_dir=/abseil-cpp_vendor ${BAZEL_EXTRA_ARGS:-}" fi for std in ${STD}; do @@ -71,7 +71,7 @@ for std in ${STD}; do --rm \ ${DOCKER_EXTRA_ARGS:-} \ ${DOCKER_CONTAINER} \ - /bin/sh -c " + /bin/bash --login -c " cp -r /abseil-cpp-ro/* /abseil-cpp/ if [ -n \"${ALTERNATE_OPTIONS:-}\" ]; then cp ${ALTERNATE_OPTIONS:-} absl/base/options.h || exit 1 diff --git a/ci/windows_msvc_cmake.bat b/ci/windows_msvc_cmake.bat index c2d9e429f9a..62cdb70c408 100755 --- a/ci/windows_msvc_cmake.bat +++ b/ci/windows_msvc_cmake.bat @@ -16,7 +16,7 @@ SETLOCAL ENABLEDELAYEDEXPANSION :: The version of GoogleTest to be used in the CMake tests in this directory. :: Keep this in sync with the version in the WORKSPACE file. -SET ABSL_GOOGLETEST_VERSION=1.16.0 +SET ABSL_GOOGLETEST_VERSION=1.17.0 SET ABSL_GOOGLETEST_DOWNLOAD_URL=https://github.com/google/googletest/releases/download/v%ABSL_GOOGLETEST_VERSION%/googletest-%ABSL_GOOGLETEST_VERSION%.tar.gz :: Replace '\' with '/' in Windows paths for CMake. From 9fcfa0602d43f707b54d1a0edcd7acc210786cf2 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Wed, 7 May 2025 12:03:45 -0700 Subject: [PATCH 008/107] Change some nullability annotations in absl::Span to absl_nullability_unknown to workaround a bug that makes nullability checks trigger in foreach loops, while still fixing the -Wnullability-completeness warnings. PiperOrigin-RevId: 755951074 Change-Id: Ia6eea53f381d9255856a3f85efa41f0dfbd5c684 --- absl/types/span.h | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/absl/types/span.h b/absl/types/span.h index ca9deaf7959..39e6a8a5d75 100644 --- a/absl/types/span.h +++ b/absl/types/span.h @@ -202,10 +202,11 @@ class ABSL_ATTRIBUTE_VIEW Span { public: using element_type = T; using value_type = absl::remove_cv_t; - // TODO(b/316099902) - pointer should be Nullable, but this makes it hard - // to recognize foreach loops as safe. - using pointer = T*; - using const_pointer = const T*; + // TODO(b/316099902) - pointer should be absl_nullable, but this makes it hard + // to recognize foreach loops as safe. absl_nullability_unknown is currently + // used to suppress -Wnullability-completeness warnings. + using pointer = T* absl_nullability_unknown; + using const_pointer = const T* absl_nullability_unknown; using reference = T&; using const_reference = const T&; using iterator = pointer; @@ -220,7 +221,7 @@ class ABSL_ATTRIBUTE_VIEW Span { static const size_type npos = ~(size_type(0)); constexpr Span() noexcept : Span(nullptr, 0) {} - constexpr Span(pointer absl_nullable array ABSL_ATTRIBUTE_LIFETIME_BOUND, + constexpr Span(pointer array ABSL_ATTRIBUTE_LIFETIME_BOUND, size_type length) noexcept : ptr_(array), len_(length) {} @@ -310,7 +311,7 @@ class ABSL_ATTRIBUTE_VIEW Span { // // Returns a pointer to the span's underlying array of data (which is held // outside the span). - constexpr pointer absl_nullable data() const noexcept { return ptr_; } + constexpr pointer data() const noexcept { return ptr_; } // Span::size() // @@ -368,31 +369,27 @@ class ABSL_ATTRIBUTE_VIEW Span { // // Returns an iterator pointing to the first element of this span, or `end()` // if the span is empty. - constexpr iterator absl_nullable begin() const noexcept { return data(); } + constexpr iterator begin() const noexcept { return data(); } // Span::cbegin() // // Returns a const iterator pointing to the first element of this span, or // `end()` if the span is empty. - constexpr const_iterator absl_nullable cbegin() const noexcept { - return begin(); - } + constexpr const_iterator cbegin() const noexcept { return begin(); } // Span::end() // // Returns an iterator pointing just beyond the last element at the // end of this span. This iterator acts as a placeholder; attempting to // access it results in undefined behavior. - constexpr iterator absl_nullable end() const noexcept { - return data() + size(); - } + constexpr iterator end() const noexcept { return data() + size(); } // Span::cend() // // Returns a const iterator pointing just beyond the last element at the // end of this span. This iterator acts as a placeholder; attempting to // access it results in undefined behavior. - constexpr const_iterator absl_nullable cend() const noexcept { return end(); } + constexpr const_iterator cend() const noexcept { return end(); } // Span::rbegin() // @@ -507,7 +504,7 @@ class ABSL_ATTRIBUTE_VIEW Span { } private: - pointer absl_nullable ptr_; + pointer ptr_; size_type len_; }; From 464b5b366c9bdd6ffe52e984851bbe9e8afccb85 Mon Sep 17 00:00:00 2001 From: Omer Mor Date: Wed, 7 May 2025 13:51:33 -0700 Subject: [PATCH 009/107] Deprecate `ABSL_HAVE_STD_STRING_VIEW`. This macro is no longer necessary now that Abseil requires C++17. PiperOrigin-RevId: 755992345 Change-Id: Id1361d62d860a0ba4bdfca22e8f39d54812ef82c --- absl/base/config.h | 16 +++--- .../internal/hash_function_defaults.h | 9 +--- .../internal/hash_function_defaults_test.cc | 53 +------------------ absl/hash/hash_test.cc | 17 +----- absl/hash/internal/hash.h | 9 +--- absl/meta/type_traits.h | 7 +-- absl/meta/type_traits_test.cc | 7 +-- absl/strings/internal/str_format/arg.cc | 7 +-- absl/strings/internal/str_format/arg.h | 12 +---- .../internal/str_format/convert_test.cc | 7 +-- absl/strings/str_cat.h | 4 +- absl/strings/str_cat_test.cc | 7 +-- absl/strings/string_view_test.cc | 2 +- 13 files changed, 19 insertions(+), 138 deletions(-) diff --git a/absl/base/config.h b/absl/base/config.h index 7514b86e5e7..f3cafbdfa53 100644 --- a/absl/base/config.h +++ b/absl/base/config.h @@ -530,13 +530,12 @@ static_assert(ABSL_INTERNAL_INLINE_NAMESPACE_STR[0] != 'h' || // ABSL_HAVE_STD_STRING_VIEW // -// Checks whether C++17 std::string_view is available. +// Deprecated: always defined to 1. +// std::string_view was added in C++17, which means all versions of C++ +// supported by Abseil have it. #ifdef ABSL_HAVE_STD_STRING_VIEW #error "ABSL_HAVE_STD_STRING_VIEW cannot be directly set." -#elif defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L -#define ABSL_HAVE_STD_STRING_VIEW 1 -#elif defined(ABSL_INTERNAL_CPLUSPLUS_LANG) && \ - ABSL_INTERNAL_CPLUSPLUS_LANG >= 201703L +#else #define ABSL_HAVE_STD_STRING_VIEW 1 #endif @@ -561,13 +560,10 @@ static_assert(ABSL_INTERNAL_INLINE_NAMESPACE_STR[0] != 'h' || // Indicates whether absl::string_view is an alias for std::string_view. #if !defined(ABSL_OPTION_USE_STD_STRING_VIEW) #error options.h is misconfigured. -#elif ABSL_OPTION_USE_STD_STRING_VIEW == 0 || \ - (ABSL_OPTION_USE_STD_STRING_VIEW == 2 && \ - !defined(ABSL_HAVE_STD_STRING_VIEW)) +#elif ABSL_OPTION_USE_STD_STRING_VIEW == 0 #undef ABSL_USES_STD_STRING_VIEW #elif ABSL_OPTION_USE_STD_STRING_VIEW == 1 || \ - (ABSL_OPTION_USE_STD_STRING_VIEW == 2 && \ - defined(ABSL_HAVE_STD_STRING_VIEW)) + ABSL_OPTION_USE_STD_STRING_VIEW == 2 #define ABSL_USES_STD_STRING_VIEW 1 #else #error options.h is misconfigured. diff --git a/absl/container/internal/hash_function_defaults.h b/absl/container/internal/hash_function_defaults.h index 0f07bcfe294..c2a757b53f6 100644 --- a/absl/container/internal/hash_function_defaults.h +++ b/absl/container/internal/hash_function_defaults.h @@ -49,6 +49,7 @@ #include #include #include +#include #include #include "absl/base/config.h" @@ -58,10 +59,6 @@ #include "absl/strings/cord.h" #include "absl/strings/string_view.h" -#ifdef ABSL_HAVE_STD_STRING_VIEW -#include -#endif - namespace absl { ABSL_NAMESPACE_BEGIN namespace container_internal { @@ -113,8 +110,6 @@ struct HashEq : StringHashEq {}; template <> struct HashEq : StringHashEq {}; -#ifdef ABSL_HAVE_STD_STRING_VIEW - template struct BasicStringHash { using is_transparent = void; @@ -153,8 +148,6 @@ struct HashEq : BasicStringHashEq {}; template <> struct HashEq : BasicStringHashEq {}; -#endif // ABSL_HAVE_STD_STRING_VIEW - // Supports heterogeneous lookup for pointers and smart pointers. template struct HashEq { diff --git a/absl/container/internal/hash_function_defaults_test.cc b/absl/container/internal/hash_function_defaults_test.cc index 912d1190a6b..9a39b0719cd 100644 --- a/absl/container/internal/hash_function_defaults_test.cc +++ b/absl/container/internal/hash_function_defaults_test.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -28,10 +29,6 @@ #include "absl/strings/cord_test_helpers.h" #include "absl/strings/string_view.h" -#ifdef ABSL_HAVE_STD_STRING_VIEW -#include -#endif - namespace absl { ABSL_NAMESPACE_BEGIN namespace container_internal { @@ -118,9 +115,6 @@ TYPED_TEST(HashString, Works) { } TEST(BasicStringViewTest, WStringEqWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_eq eq; EXPECT_TRUE(eq(L"a", L"a")); EXPECT_TRUE(eq(L"a", std::wstring_view(L"a"))); @@ -128,13 +122,9 @@ TEST(BasicStringViewTest, WStringEqWorks) { EXPECT_FALSE(eq(L"a", L"b")); EXPECT_FALSE(eq(L"a", std::wstring_view(L"b"))); EXPECT_FALSE(eq(L"a", std::wstring(L"b"))); -#endif } TEST(BasicStringViewTest, WStringViewEqWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_eq eq; EXPECT_TRUE(eq(L"a", L"a")); EXPECT_TRUE(eq(L"a", std::wstring_view(L"a"))); @@ -142,13 +132,9 @@ TEST(BasicStringViewTest, WStringViewEqWorks) { EXPECT_FALSE(eq(L"a", L"b")); EXPECT_FALSE(eq(L"a", std::wstring_view(L"b"))); EXPECT_FALSE(eq(L"a", std::wstring(L"b"))); -#endif } TEST(BasicStringViewTest, U16StringEqWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_eq eq; EXPECT_TRUE(eq(u"a", u"a")); EXPECT_TRUE(eq(u"a", std::u16string_view(u"a"))); @@ -156,13 +142,9 @@ TEST(BasicStringViewTest, U16StringEqWorks) { EXPECT_FALSE(eq(u"a", u"b")); EXPECT_FALSE(eq(u"a", std::u16string_view(u"b"))); EXPECT_FALSE(eq(u"a", std::u16string(u"b"))); -#endif } TEST(BasicStringViewTest, U16StringViewEqWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_eq eq; EXPECT_TRUE(eq(u"a", u"a")); EXPECT_TRUE(eq(u"a", std::u16string_view(u"a"))); @@ -170,13 +152,9 @@ TEST(BasicStringViewTest, U16StringViewEqWorks) { EXPECT_FALSE(eq(u"a", u"b")); EXPECT_FALSE(eq(u"a", std::u16string_view(u"b"))); EXPECT_FALSE(eq(u"a", std::u16string(u"b"))); -#endif } TEST(BasicStringViewTest, U32StringEqWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_eq eq; EXPECT_TRUE(eq(U"a", U"a")); EXPECT_TRUE(eq(U"a", std::u32string_view(U"a"))); @@ -184,13 +162,9 @@ TEST(BasicStringViewTest, U32StringEqWorks) { EXPECT_FALSE(eq(U"a", U"b")); EXPECT_FALSE(eq(U"a", std::u32string_view(U"b"))); EXPECT_FALSE(eq(U"a", std::u32string(U"b"))); -#endif } TEST(BasicStringViewTest, U32StringViewEqWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_eq eq; EXPECT_TRUE(eq(U"a", U"a")); EXPECT_TRUE(eq(U"a", std::u32string_view(U"a"))); @@ -198,85 +172,60 @@ TEST(BasicStringViewTest, U32StringViewEqWorks) { EXPECT_FALSE(eq(U"a", U"b")); EXPECT_FALSE(eq(U"a", std::u32string_view(U"b"))); EXPECT_FALSE(eq(U"a", std::u32string(U"b"))); -#endif } TEST(BasicStringViewTest, WStringHashWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_hash hash; auto h = hash(L"a"); EXPECT_EQ(h, hash(std::wstring_view(L"a"))); EXPECT_EQ(h, hash(std::wstring(L"a"))); EXPECT_NE(h, hash(std::wstring_view(L"b"))); EXPECT_NE(h, hash(std::wstring(L"b"))); -#endif } TEST(BasicStringViewTest, WStringViewHashWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_hash hash; auto h = hash(L"a"); EXPECT_EQ(h, hash(std::wstring_view(L"a"))); EXPECT_EQ(h, hash(std::wstring(L"a"))); EXPECT_NE(h, hash(std::wstring_view(L"b"))); EXPECT_NE(h, hash(std::wstring(L"b"))); -#endif } TEST(BasicStringViewTest, U16StringHashWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_hash hash; auto h = hash(u"a"); EXPECT_EQ(h, hash(std::u16string_view(u"a"))); EXPECT_EQ(h, hash(std::u16string(u"a"))); EXPECT_NE(h, hash(std::u16string_view(u"b"))); EXPECT_NE(h, hash(std::u16string(u"b"))); -#endif } TEST(BasicStringViewTest, U16StringViewHashWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_hash hash; auto h = hash(u"a"); EXPECT_EQ(h, hash(std::u16string_view(u"a"))); EXPECT_EQ(h, hash(std::u16string(u"a"))); EXPECT_NE(h, hash(std::u16string_view(u"b"))); EXPECT_NE(h, hash(std::u16string(u"b"))); -#endif } TEST(BasicStringViewTest, U32StringHashWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_hash hash; auto h = hash(U"a"); EXPECT_EQ(h, hash(std::u32string_view(U"a"))); EXPECT_EQ(h, hash(std::u32string(U"a"))); EXPECT_NE(h, hash(std::u32string_view(U"b"))); EXPECT_NE(h, hash(std::u32string(U"b"))); -#endif } TEST(BasicStringViewTest, U32StringViewHashWorks) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else hash_default_hash hash; auto h = hash(U"a"); EXPECT_EQ(h, hash(std::u32string_view(U"a"))); EXPECT_EQ(h, hash(std::u32string(U"a"))); EXPECT_NE(h, hash(std::u32string_view(U"b"))); EXPECT_NE(h, hash(std::u32string(U"b"))); -#endif } struct NoDeleter { diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index c3182f1ff6a..7582f54431f 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -55,10 +56,6 @@ #include // NOLINT #endif -#ifdef ABSL_HAVE_STD_STRING_VIEW -#include -#endif - namespace { using ::absl::hash_test_internal::is_hashable; @@ -495,22 +492,15 @@ TEST(HashValueTest, U32String) { } TEST(HashValueTest, WStringView) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else EXPECT_TRUE((is_hashable::value)); EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly(std::make_tuple( std::wstring_view(), std::wstring_view(L"ABC"), std::wstring_view(L"ABC"), std::wstring_view(L"Some other different string_view"), std::wstring_view(L"Iñtërnâtiônàlizætiøn")))); -#endif } TEST(HashValueTest, U16StringView) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else EXPECT_TRUE((is_hashable::value)); EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly( @@ -518,13 +508,9 @@ TEST(HashValueTest, U16StringView) { std::u16string_view(u"ABC"), std::u16string_view(u"Some other different string_view"), std::u16string_view(u"Iñtërnâtiônàlizætiøn")))); -#endif } TEST(HashValueTest, U32StringView) { -#ifndef ABSL_HAVE_STD_STRING_VIEW - GTEST_SKIP(); -#else EXPECT_TRUE((is_hashable::value)); EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly( @@ -532,7 +518,6 @@ TEST(HashValueTest, U32StringView) { std::u32string_view(U"ABC"), std::u32string_view(U"Some other different string_view"), std::u32string_view(U"Iñtërnâtiônàlizætiøn")))); -#endif } TEST(HashValueTest, StdFilesystemPath) { diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index c7916b50936..63b35490b21 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -92,10 +93,6 @@ #include // NOLINT #endif -#ifdef ABSL_HAVE_STD_STRING_VIEW -#include -#endif - namespace absl { ABSL_NAMESPACE_BEGIN @@ -640,8 +637,6 @@ H AbslHashValue( WeaklyMixedInteger{str.size()}); } -#ifdef ABSL_HAVE_STD_STRING_VIEW - // Support std::wstring_view, std::u16string_view and std::u32string_view. template ::value || @@ -653,8 +648,6 @@ H AbslHashValue(H hash_state, std::basic_string_view str) { WeaklyMixedInteger{str.size()}); } -#endif // ABSL_HAVE_STD_STRING_VIEW - #if defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && \ (!defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) || \ __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ >= 130000) && \ diff --git a/absl/meta/type_traits.h b/absl/meta/type_traits.h index 5e57a154f57..ba57e52f810 100644 --- a/absl/meta/type_traits.h +++ b/absl/meta/type_traits.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -48,10 +49,6 @@ #include // NOLINT(build/c++20) #endif -#ifdef ABSL_HAVE_STD_STRING_VIEW -#include -#endif - // Defines the default alignment. `__STDCPP_DEFAULT_NEW_ALIGNMENT__` is a C++17 // feature. #if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) @@ -507,10 +504,8 @@ template struct IsView : std::integral_constant::value || IsViewImpl::value> {}; -#ifdef ABSL_HAVE_STD_STRING_VIEW template struct IsView> : std::true_type {}; -#endif #ifdef __cpp_lib_span template diff --git a/absl/meta/type_traits_test.cc b/absl/meta/type_traits_test.cc index bcf90d73877..7c2dbbcfeb0 100644 --- a/absl/meta/type_traits_test.cc +++ b/absl/meta/type_traits_test.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -26,10 +27,6 @@ #include "absl/time/clock.h" #include "absl/time/time.h" -#ifdef ABSL_HAVE_STD_STRING_VIEW -#include -#endif - namespace { using ::testing::StaticAssertTypeEq; @@ -45,12 +42,10 @@ static_assert(IsOwnerAndNotView::value, "string is an owner, not a view"); static_assert(IsOwnerAndNotView::value, "wstring is an owner, not a view"); -#ifdef ABSL_HAVE_STD_STRING_VIEW static_assert(!IsOwnerAndNotView::value, "string_view is a view, not an owner"); static_assert(!IsOwnerAndNotView::value, "wstring_view is a view, not an owner"); -#endif template struct simple_pair { diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index 103c85d16ea..01e4e42d9d0 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "absl/base/config.h" @@ -38,10 +39,6 @@ #include "absl/strings/numbers.h" #include "absl/strings/string_view.h" -#if defined(ABSL_HAVE_STD_STRING_VIEW) -#include -#endif - namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { @@ -459,13 +456,11 @@ StringConvertResult FormatConvertImpl(string_view v, return {ConvertStringArg(v, conv, sink)}; } -#if defined(ABSL_HAVE_STD_STRING_VIEW) StringConvertResult FormatConvertImpl(std::wstring_view v, const FormatConversionSpecImpl conv, FormatSinkImpl* sink) { return {ConvertStringArg(v.data(), v.size(), conv, sink)}; } -#endif StringPtrConvertResult FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index 309161d5915..021013fb1ab 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -37,10 +38,6 @@ #include "absl/strings/internal/str_format/extension.h" #include "absl/strings/string_view.h" -#if defined(ABSL_HAVE_STD_STRING_VIEW) -#include -#endif - namespace absl { ABSL_NAMESPACE_BEGIN @@ -228,7 +225,6 @@ StringConvertResult FormatConvertImpl(const std::wstring& v, StringConvertResult FormatConvertImpl(string_view v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -#if defined(ABSL_HAVE_STD_STRING_VIEW) StringConvertResult FormatConvertImpl(std::wstring_view v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); @@ -239,7 +235,6 @@ inline StringConvertResult FormatConvertImpl(std::string_view v, return FormatConvertImpl(absl::string_view(v.data(), v.size()), conv, sink); } #endif // !ABSL_USES_STD_STRING_VIEW -#endif // ABSL_HAVE_STD_STRING_VIEW using StringPtrConvertResult = ArgConvertResult #include #include +#include #include // NOLINT #include #include @@ -46,10 +47,6 @@ #include "absl/types/optional.h" #include "absl/types/span.h" -#if defined(ABSL_HAVE_STD_STRING_VIEW) -#include -#endif - namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { @@ -322,10 +319,8 @@ TEST_F(FormatConvertTest, BasicString) { TestStringConvert(std::string("hello")); TestStringConvert(std::wstring(L"hello")); TestStringConvert(string_view("hello")); -#if defined(ABSL_HAVE_STD_STRING_VIEW) TestStringConvert(std::string_view("hello")); TestStringConvert(std::wstring_view(L"hello")); -#endif // ABSL_HAVE_STD_STRING_VIEW } TEST_F(FormatConvertTest, NullString) { diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index 227b22af239..84db0f6cd50 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -111,7 +111,7 @@ #include "absl/strings/numbers.h" #include "absl/strings/string_view.h" -#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW) +#if !defined(ABSL_USES_STD_STRING_VIEW) #include #endif @@ -366,7 +366,7 @@ class AlphaNum { ABSL_ATTRIBUTE_LIFETIME_BOUND) : piece_(pc) {} -#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW) +#if !defined(ABSL_USES_STD_STRING_VIEW) AlphaNum(std::string_view pc // NOLINT(runtime/explicit) ABSL_ATTRIBUTE_LIFETIME_BOUND) : piece_(pc.data(), pc.size()) {} diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc index 4de379eb531..a3bd42ccd97 100644 --- a/absl/strings/str_cat_test.cc +++ b/absl/strings/str_cat_test.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "gtest/gtest.h" @@ -28,10 +29,6 @@ #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" -#if defined(ABSL_HAVE_STD_STRING_VIEW) -#include -#endif - #ifdef __ANDROID__ // Android assert messages only go to system log, so death tests cannot inspect // the message for matching. @@ -219,13 +216,11 @@ TEST(StrCat, CornerCases) { EXPECT_EQ(result, ""); } -#if defined(ABSL_HAVE_STD_STRING_VIEW) TEST(StrCat, StdStringView) { std::string_view pieces[] = {"Hello", ", ", "World", "!"}; EXPECT_EQ(absl::StrCat(pieces[0], pieces[1], pieces[2], pieces[3]), "Hello, World!"); } -#endif // ABSL_HAVE_STD_STRING_VIEW TEST(StrCat, NullConstCharPtr) { const char* null = nullptr; diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index 7064cc7183d..0a2a7a97b4b 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -34,7 +34,7 @@ #include "absl/base/config.h" #include "absl/meta/type_traits.h" -#if defined(ABSL_HAVE_STD_STRING_VIEW) || defined(__ANDROID__) +#if defined(ABSL_USES_STD_STRING_VIEW) || defined(__ANDROID__) // We don't control the death messaging when using std::string_view. // Android assert messages only go to system log, so death tests cannot inspect // the message for matching. From bba13cb1ac190cb4b77f6e55b6875b84be34df01 Mon Sep 17 00:00:00 2001 From: Omer Mor Date: Wed, 7 May 2025 16:31:54 -0700 Subject: [PATCH 010/107] Add support for logging wide strings in `absl::log`. The logged strings are truncated to fit the available buffer in the same way as regular strings. Invalid characters are replaced by the replacement char (`U+FFFD`). Wide string literals are recorded as literals in `logging.proto.Value`. PiperOrigin-RevId: 756052375 Change-Id: If3960b4230f923061028bd738eb209b5a82a31eb --- absl/log/CMakeLists.txt | 2 + absl/log/internal/BUILD.bazel | 2 + absl/log/internal/append_truncated.h | 28 ++++ absl/log/internal/log_message.cc | 80 ++++++++-- absl/log/internal/log_message.h | 49 +++++- absl/log/log_format_test.cc | 227 ++++++++++++++++++++++++++- 6 files changed, 366 insertions(+), 22 deletions(-) diff --git a/absl/log/CMakeLists.txt b/absl/log/CMakeLists.txt index 6aae05d1c30..130897f49f6 100644 --- a/absl/log/CMakeLists.txt +++ b/absl/log/CMakeLists.txt @@ -218,6 +218,7 @@ absl_cc_library( absl::span absl::strerror absl::strings + absl::strings_internal absl::time ) @@ -395,6 +396,7 @@ absl_cc_library( DEPS absl::config absl::strings + absl::strings_internal absl::span ) diff --git a/absl/log/internal/BUILD.bazel b/absl/log/internal/BUILD.bazel index 44ec71bb437..953b690e98e 100644 --- a/absl/log/internal/BUILD.bazel +++ b/absl/log/internal/BUILD.bazel @@ -205,6 +205,7 @@ cc_library( "//absl/log:log_sink_registry", "//absl/memory", "//absl/strings", + "//absl/strings:internal", "//absl/time", "//absl/types:span", ], @@ -218,6 +219,7 @@ cc_library( deps = [ "//absl/base:config", "//absl/strings", + "//absl/strings:internal", "//absl/types:span", ], ) diff --git a/absl/log/internal/append_truncated.h b/absl/log/internal/append_truncated.h index f0e7912c2ac..d420a8b5c6a 100644 --- a/absl/log/internal/append_truncated.h +++ b/absl/log/internal/append_truncated.h @@ -17,8 +17,10 @@ #include #include +#include #include "absl/base/config.h" +#include "absl/strings/internal/utf8.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" @@ -33,6 +35,32 @@ inline size_t AppendTruncated(absl::string_view src, absl::Span &dst) { dst.remove_prefix(src.size()); return src.size(); } +// Likewise, but it also takes a wide character string and transforms it into a +// UTF-8 encoded byte string regardless of the current locale. +// - On platforms where `wchar_t` is 2 bytes (e.g., Windows), the input is +// treated as UTF-16. +// - On platforms where `wchar_t` is 4 bytes (e.g., Linux, macOS), the input +// is treated as UTF-32. +inline size_t AppendTruncated(std::wstring_view src, absl::Span &dst) { + absl::strings_internal::ShiftState state; + size_t total_bytes_written = 0; + for (const wchar_t wc : src) { + // If the destination buffer might not be large enough to write the next + // character, stop. + if (dst.size() < absl::strings_internal::kMaxEncodedUTF8Size) break; + size_t bytes_written = + absl::strings_internal::WideToUtf8(wc, dst.data(), state); + if (bytes_written == static_cast(-1)) { + // Invalid character. Encode REPLACEMENT CHARACTER (U+FFFD) instead. + constexpr wchar_t kReplacementCharacter = L'\uFFFD'; + bytes_written = absl::strings_internal::WideToUtf8(kReplacementCharacter, + dst.data(), state); + } + dst.remove_prefix(bytes_written); + total_bytes_written += bytes_written; + } + return total_bytes_written; +} // Likewise, but `n` copies of `c`. inline size_t AppendTruncated(char c, size_t n, absl::Span &dst) { if (n > dst.size()) n = dst.size(); diff --git a/absl/log/internal/log_message.cc b/absl/log/internal/log_message.cc index aaaaf0357b1..07d17a02c1d 100644 --- a/absl/log/internal/log_message.cc +++ b/absl/log/internal/log_message.cc @@ -27,10 +27,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include "absl/base/attributes.h" @@ -47,12 +49,14 @@ #include "absl/log/internal/globals.h" #include "absl/log/internal/log_format.h" #include "absl/log/internal/log_sink_set.h" +#include "absl/log/internal/nullguard.h" #include "absl/log/internal/proto.h" #include "absl/log/internal/structured_proto.h" #include "absl/log/log_entry.h" #include "absl/log/log_sink.h" #include "absl/log/log_sink_registry.h" #include "absl/memory/memory.h" +#include "absl/strings/internal/utf8.h" #include "absl/strings/string_view.h" #include "absl/time/clock.h" #include "absl/time/time.h" @@ -403,6 +407,35 @@ LogMessage& LogMessage::operator<<(absl::string_view v) { CopyToEncodedBuffer(v); return *this; } + +LogMessage& LogMessage::operator<<(const std::wstring& v) { + CopyToEncodedBuffer(v); + return *this; +} + +LogMessage& LogMessage::operator<<(std::wstring_view v) { + CopyToEncodedBuffer(v); + return *this; +} + +template <> +LogMessage& LogMessage::operator<< ( + const wchar_t* absl_nullable const& v) { + if (v == nullptr) { + CopyToEncodedBuffer( + absl::string_view(kCharNull.data(), kCharNull.size() - 1)); + } else { + CopyToEncodedBuffer( + std::wstring_view(v, wcsnlen(v, data_->encoded_remaining().size()))); + } + return *this; +} + +LogMessage& LogMessage::operator<<(wchar_t v) { + CopyToEncodedBuffer(std::wstring_view(&v, 1)); + return *this; +} + LogMessage& LogMessage::operator<<(std::ostream& (*m)(std::ostream& os)) { OstreamView view(*data_); data_->manipulated << m; @@ -625,6 +658,37 @@ template void LogMessage::CopyToEncodedBuffer( template void LogMessage::CopyToEncodedBuffer< LogMessage::StringType::kNotLiteral>(char ch, size_t num); +template +void LogMessage::CopyToEncodedBuffer(std::wstring_view str) { + auto encoded_remaining_copy = data_->encoded_remaining(); + constexpr uint8_t tag_value = str_type == StringType::kLiteral + ? ValueTag::kStringLiteral + : ValueTag::kString; + size_t max_str_byte_length = + absl::strings_internal::kMaxEncodedUTF8Size * str.length(); + auto value_start = + EncodeMessageStart(EventTag::kValue, + BufferSizeFor(tag_value, WireType::kLengthDelimited) + + max_str_byte_length, + &encoded_remaining_copy); + auto str_start = EncodeMessageStart(tag_value, max_str_byte_length, + &encoded_remaining_copy); + if (str_start.data()) { + log_internal::AppendTruncated(str, encoded_remaining_copy); + EncodeMessageLength(str_start, &encoded_remaining_copy); + EncodeMessageLength(value_start, &encoded_remaining_copy); + data_->encoded_remaining() = encoded_remaining_copy; + } else { + // The field header(s) did not fit; zero `encoded_remaining()` so we don't + // write anything else later. + data_->encoded_remaining().remove_suffix(data_->encoded_remaining().size()); + } +} +template void LogMessage::CopyToEncodedBuffer( + std::wstring_view str); +template void LogMessage::CopyToEncodedBuffer< + LogMessage::StringType::kNotLiteral>(std::wstring_view str); + template void LogMessage::CopyToEncodedBufferWithStructuredProtoField< LogMessage::StringType::kLiteral>(StructuredProtoField field, absl::string_view str); @@ -682,17 +746,13 @@ LogMessageFatal::LogMessageFatal(const char* absl_nonnull file, int line, *this << "Check failed: " << failure_msg << " "; } -LogMessageFatal::~LogMessageFatal() { - FailWithoutStackTrace(); -} +LogMessageFatal::~LogMessageFatal() { FailWithoutStackTrace(); } LogMessageDebugFatal::LogMessageDebugFatal(const char* absl_nonnull file, int line) : LogMessage(file, line, absl::LogSeverity::kFatal) {} -LogMessageDebugFatal::~LogMessageDebugFatal() { - FailWithoutStackTrace(); -} +LogMessageDebugFatal::~LogMessageDebugFatal() { FailWithoutStackTrace(); } LogMessageQuietlyDebugFatal::LogMessageQuietlyDebugFatal( const char* absl_nonnull file, int line) @@ -700,9 +760,7 @@ LogMessageQuietlyDebugFatal::LogMessageQuietlyDebugFatal( SetFailQuietly(); } -LogMessageQuietlyDebugFatal::~LogMessageQuietlyDebugFatal() { - FailQuietly(); -} +LogMessageQuietlyDebugFatal::~LogMessageQuietlyDebugFatal() { FailQuietly(); } LogMessageQuietlyFatal::LogMessageQuietlyFatal(const char* absl_nonnull file, int line) @@ -717,9 +775,7 @@ LogMessageQuietlyFatal::LogMessageQuietlyFatal( *this << "Check failed: " << failure_msg << " "; } -LogMessageQuietlyFatal::~LogMessageQuietlyFatal() { - FailQuietly(); -} +LogMessageQuietlyFatal::~LogMessageQuietlyFatal() { FailQuietly(); } #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(pop) #endif diff --git a/absl/log/internal/log_message.h b/absl/log/internal/log_message.h index e7eff47b406..1aaf05e31f4 100644 --- a/absl/log/internal/log_message.h +++ b/absl/log/internal/log_message.h @@ -27,12 +27,15 @@ #ifndef ABSL_LOG_INTERNAL_LOG_MESSAGE_H_ #define ABSL_LOG_INTERNAL_LOG_MESSAGE_H_ +#include + #include #include #include #include #include #include +#include #include #include "absl/base/attributes.h" @@ -158,6 +161,13 @@ class LogMessage { LogMessage& operator<<(const std::string& v); LogMessage& operator<<(absl::string_view v); + // Wide string overloads (since std::ostream does not provide them). + LogMessage& operator<<(const std::wstring& v); + LogMessage& operator<<(std::wstring_view v); + // `const wchar_t*` is handled by `operator<< `. + LogMessage& operator<<(wchar_t* absl_nullable v); + LogMessage& operator<<(wchar_t v); + // Handle stream manipulators e.g. std::endl. LogMessage& operator<<(std::ostream& (*absl_nonnull m)(std::ostream& os)); LogMessage& operator<<(std::ios_base& (*absl_nonnull m)(std::ios_base& os)); @@ -169,17 +179,20 @@ class LogMessage { // this template for every value of `SIZE` encountered in each source code // file. That significantly increases linker input sizes. Inlining is cheap // because the argument to this overload is almost always a string literal so - // the call to `strlen` can be replaced at compile time. The overload for - // `char[]` below should not be inlined. The compiler typically does not have - // the string at compile time and cannot replace the call to `strlen` so - // inlining it increases the binary size. See the discussion on + // the call to `strlen` can be replaced at compile time. The overloads for + // `char[]`/`wchar_t[]` below should not be inlined. The compiler typically + // does not have the string at compile time and cannot replace the call to + // `strlen` so inlining it increases the binary size. See the discussion on // cl/107527369. template LogMessage& operator<<(const char (&buf)[SIZE]); + template + LogMessage& operator<<(const wchar_t (&buf)[SIZE]); // This prevents non-const `char[]` arrays from looking like literals. template LogMessage& operator<<(char (&buf)[SIZE]) ABSL_ATTRIBUTE_NOINLINE; + // `wchar_t[SIZE]` is handled by `operator<< `. // Types that support `AbslStringify()` are serialized that way. // Types that don't support `AbslStringify()` but do support streaming into a @@ -243,6 +256,8 @@ class LogMessage { void CopyToEncodedBuffer(absl::string_view str) ABSL_ATTRIBUTE_NOINLINE; template void CopyToEncodedBuffer(char ch, size_t num) ABSL_ATTRIBUTE_NOINLINE; + template + void CopyToEncodedBuffer(std::wstring_view str) ABSL_ATTRIBUTE_NOINLINE; // Copies `field` to the encoded buffer, then appends `str` after it // (truncating `str` if necessary to fit). @@ -273,6 +288,22 @@ class LogMessage { absl_nonnull std::unique_ptr data_; }; +// Explicitly specializes the generic operator<< for `const wchar_t*` +// arguments. +// +// This method is used instead of a non-template `const wchar_t*` overload, +// as the latter was found to take precedence over the array template +// (`operator<<(const wchar_t(&)[SIZE])`) when handling string literals. +// This specialization ensures the array template now correctly processes +// literals. +template <> +LogMessage& LogMessage::operator<< ( + const wchar_t* absl_nullable const& v); + +inline LogMessage& LogMessage::operator<<(wchar_t* absl_nullable v) { + return operator<<(const_cast(v)); +} + // Helper class so that `AbslStringify()` can modify the LogMessage. class StringifySink final { public: @@ -317,6 +348,12 @@ LogMessage& LogMessage::operator<<(const char (&buf)[SIZE]) { return *this; } +template +LogMessage& LogMessage::operator<<(const wchar_t (&buf)[SIZE]) { + CopyToEncodedBuffer(buf); + return *this; +} + // Note: the following is declared `ABSL_ATTRIBUTE_NOINLINE` template LogMessage& LogMessage::operator<<(char (&buf)[SIZE]) { @@ -358,6 +395,10 @@ LogMessage::CopyToEncodedBuffer(char ch, size_t num); extern template void LogMessage::CopyToEncodedBuffer< LogMessage::StringType::kNotLiteral>(char ch, size_t num); +extern template void LogMessage::CopyToEncodedBuffer< + LogMessage::StringType::kLiteral>(std::wstring_view str); +extern template void LogMessage::CopyToEncodedBuffer< + LogMessage::StringType::kNotLiteral>(std::wstring_view str); // `LogMessageFatal` ensures the process will exit in failure after logging this // message. diff --git a/absl/log/log_format_test.cc b/absl/log/log_format_test.cc index ecd69683009..f4e33c98a45 100644 --- a/absl/log/log_format_test.cc +++ b/absl/log/log_format_test.cc @@ -15,12 +15,14 @@ #include +#include #include #include #include #include #include #include +#include #include #ifdef __ANDROID__ @@ -28,6 +30,7 @@ #endif #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/config.h" #include "absl/log/check.h" #include "absl/log/internal/test_matchers.h" #include "absl/log/log.h" @@ -44,6 +47,7 @@ using ::absl::log_internal::MatchesOstream; using ::absl::log_internal::RawEncodedMessage; using ::absl::log_internal::TextMessage; using ::absl::log_internal::TextPrefix; +using ::testing::_; using ::testing::AllOf; using ::testing::AnyOf; using ::testing::Each; @@ -124,6 +128,33 @@ TYPED_TEST(CharLogFormatTest, Unprintable) { LOG(INFO) << value; } +TEST(WideCharLogFormatTest, Printable) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq("€")), + ENCODED_MESSAGE(HasValues( + ElementsAre(ValueWithStr(Eq("€")))))))); + + test_sink.StartCapturingLogs(); + const wchar_t value = L'\u20AC'; + LOG(INFO) << value; +} + +TEST(WideCharLogFormatTest, Unprintable) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + // Using NEL (Next Line) Unicode character (U+0085). + // It is encoded as "\xC2\x85" in UTF-8. + constexpr wchar_t wide_value = L'\u0085'; + constexpr char value[] = "\xC2\x85"; + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << wide_value; +} + template class UnsignedIntLogFormatTest : public testing::Test {}; using UnsignedIntTypes = Types class VoidPtrLogFormatTest : public testing::Test {}; -using VoidPtrTypes = Types; +using VoidPtrTypes = Types; TYPED_TEST_SUITE(VoidPtrLogFormatTest, VoidPtrTypes); TYPED_TEST(VoidPtrLogFormatTest, Null) { @@ -676,11 +707,10 @@ TYPED_TEST(VoidPtrLogFormatTest, NonNull) { template class VolatilePtrLogFormatTest : public testing::Test {}; -using VolatilePtrTypes = - Types; +using VolatilePtrTypes = Types< + volatile void*, const volatile void*, volatile char*, const volatile char*, + volatile signed char*, const volatile signed char*, volatile unsigned char*, + const volatile unsigned char*, volatile wchar_t*, const volatile wchar_t*>; TYPED_TEST_SUITE(VolatilePtrLogFormatTest, VolatilePtrTypes); TYPED_TEST(VolatilePtrLogFormatTest, Null) { @@ -784,6 +814,38 @@ TYPED_TEST(CharPtrLogFormatTest, NonNull) { LOG(INFO) << value; } +template +class WideCharPtrLogFormatTest : public testing::Test {}; +using WideCharPtrTypes = Types; +TYPED_TEST_SUITE(WideCharPtrLogFormatTest, WideCharPtrTypes); + +TYPED_TEST(WideCharPtrLogFormatTest, Null) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam* const value = nullptr; + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq("(null)")), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq("(null)")))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TYPED_TEST(WideCharPtrLogFormatTest, NonNull) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam data[] = {'v', 'a', 'l', 'u', 'e', '\0'}; + TypeParam* const value = data; + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq("value")), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq("value")))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + TEST(BoolLogFormatTest, True) { absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); @@ -836,6 +898,17 @@ TEST(LogFormatTest, StringLiteral) { LOG(INFO) << "value"; } +TEST(LogFormatTest, WideStringLiteral) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq("value")), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithLiteral(Eq("value")))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << L"value"; +} + TEST(LogFormatTest, CharArray) { absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); @@ -854,6 +927,125 @@ TEST(LogFormatTest, CharArray) { LOG(INFO) << value; } +TEST(LogFormatTest, WideCharArray) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + wchar_t value[] = L"value"; + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq("value")), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq("value")))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +// Comprehensive test string for validating wchar_t to UTF-8 conversion. +// See details in absl/strings/internal/utf8_test.cc. +// +// clang-format off +#define ABSL_LOG_INTERNAL_WIDE_LITERAL L"Holá €1 你好 שָׁלוֹם 👍🏻🇺🇸👩‍❤️‍💋‍👨 中" +#define ABSL_LOG_INTERNAL_UTF8_LITERAL u8"Holá €1 你好 שָׁלוֹם 👍🏻🇺🇸👩‍❤️‍💋‍👨 中" +// clang-format on + +absl::string_view GetUtf8TestString() { + // `u8""` forces UTF-8 encoding; MSVC will default to e.g. CP1252 (and warn) + // without it. However, the resulting character type differs between pre-C++20 + // (`char`) and C++20 (`char8_t`). So we reinterpret_cast to `char*` and wrap + // it in a `string_view`. + static const absl::string_view kUtf8TestString( + reinterpret_cast(ABSL_LOG_INTERNAL_UTF8_LITERAL), + sizeof(ABSL_LOG_INTERNAL_UTF8_LITERAL) - 1); + return kUtf8TestString; +} + +template +class WideStringLogFormatTest : public testing::Test {}; +using StringTypes = + Types; +TYPED_TEST_SUITE(WideStringLogFormatTest, StringTypes); + +TYPED_TEST(WideStringLogFormatTest, NonLiterals) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = ABSL_LOG_INTERNAL_WIDE_LITERAL; + absl::string_view utf8_value = GetUtf8TestString(); + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TEST(WideStringLogFormatTest, StringView) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + std::wstring_view value = ABSL_LOG_INTERNAL_WIDE_LITERAL; + absl::string_view utf8_value = GetUtf8TestString(); + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TEST(WideStringLogFormatTest, Literal) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + absl::string_view utf8_value = GetUtf8TestString(); + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithLiteral(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << ABSL_LOG_INTERNAL_WIDE_LITERAL; +} + +#undef ABSL_LOG_INTERNAL_WIDE_LITERAL +#undef ABSL_LOG_INTERNAL_UTF8_LITERAL + +TYPED_TEST(WideStringLogFormatTest, InvalidCharactersAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xDC00 BBB"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA � BBB"; + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TYPED_TEST(WideStringLogFormatTest, EmptyWideString) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L""; + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq("")), + ENCODED_MESSAGE(HasValues( + ElementsAre(ValueWithStr(Eq("")))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TEST(WideStringLogFormatTest, MixedNarrowAndWideStrings) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + EXPECT_CALL(test_sink, Log(_, _, "1234")); + + test_sink.StartCapturingLogs(); + LOG(INFO) << "1" << L"2" << "3" << L"4"; +} + class CustomClass {}; std::ostream& operator<<(std::ostream& os, const CustomClass&) { return os << "CustomClass{}"; @@ -1675,6 +1867,29 @@ TEST(StructuredLoggingOverflowTest, TruncatesStrings) { LOG(INFO) << std::string(2 * absl::log_internal::kLogMessageBufferSize, 'x'); } +TEST(StructuredLoggingOverflowTest, TruncatesWideStrings) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + // This message is too long and should be truncated to some unspecified size + // no greater than the buffer size but not too much less either. It should be + // truncated rather than discarded. + EXPECT_CALL( + test_sink, + Send(AllOf( + TextMessage(AllOf( + SizeIs(AllOf(Ge(absl::log_internal::kLogMessageBufferSize - 256), + Le(absl::log_internal::kLogMessageBufferSize))), + Each(Eq('x')))), + ENCODED_MESSAGE(HasOneStrThat(AllOf( + SizeIs(AllOf(Ge(absl::log_internal::kLogMessageBufferSize - 256), + Le(absl::log_internal::kLogMessageBufferSize))), + Each(Eq('x')))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << std::wstring(2 * absl::log_internal::kLogMessageBufferSize, + L'x'); +} + struct StringLike { absl::string_view data; }; From 1b52dcb350289b262a105471a75ef6c001beecae Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 7 May 2025 16:37:30 -0700 Subject: [PATCH 011/107] Stop being strict about validating the "clone" part of mangled names Support for parsing mangled symbol names with a ".something" suffix in the demangler was originally added because the compiler was using that for function cloning, but since then the compiler has started using ".something" suffixes in many other cases, such as function renaming for ThinLTO and CFI. As a result, some symbols in binaries built with these features would fail to demangle. So instead of trying to validate what comes after the "." let's just stop parsing when we see a ".", so that the symbols are demangled correctly. This behavior is consistent with libc++. PiperOrigin-RevId: 756054147 Change-Id: I01e9e97eee32bee05ba243b615ed26a36b59591d --- absl/debugging/internal/demangle.cc | 32 +----------------------- absl/debugging/internal/demangle_test.cc | 17 +++++++------ 2 files changed, 10 insertions(+), 39 deletions(-) diff --git a/absl/debugging/internal/demangle.cc b/absl/debugging/internal/demangle.cc index dc15b8e5849..5f62ebb8978 100644 --- a/absl/debugging/internal/demangle.cc +++ b/absl/debugging/internal/demangle.cc @@ -484,36 +484,6 @@ static bool IsAlpha(char c) { static bool IsDigit(char c) { return c >= '0' && c <= '9'; } -// Returns true if "str" is a function clone suffix. These suffixes are used -// by GCC 4.5.x and later versions (and our locally-modified version of GCC -// 4.4.x) to indicate functions which have been cloned during optimization. -// We treat any sequence (.+.+)+ as a function clone suffix. -// Additionally, '_' is allowed along with the alphanumeric sequence. -static bool IsFunctionCloneSuffix(const char *str) { - size_t i = 0; - while (str[i] != '\0') { - bool parsed = false; - // Consume a single [. | _]*[.]* sequence. - if (str[i] == '.' && (IsAlpha(str[i + 1]) || str[i + 1] == '_')) { - parsed = true; - i += 2; - while (IsAlpha(str[i]) || str[i] == '_') { - ++i; - } - } - if (str[i] == '.' && IsDigit(str[i + 1])) { - parsed = true; - i += 2; - while (IsDigit(str[i])) { - ++i; - } - } - if (!parsed) - return false; - } - return true; // Consumed everything in "str". -} - static bool EndsWith(State *state, const char chr) { return state->parse_state.out_cur_idx > 0 && state->parse_state.out_cur_idx < state->out_end_idx && @@ -2932,7 +2902,7 @@ static bool ParseTopLevelMangledName(State *state) { if (ParseMangledName(state)) { if (RemainingInput(state)[0] != '\0') { // Drop trailing function clone suffix, if any. - if (IsFunctionCloneSuffix(RemainingInput(state))) { + if (RemainingInput(state)[0] == '.') { return true; } // Append trailing version suffix if any. diff --git a/absl/debugging/internal/demangle_test.cc b/absl/debugging/internal/demangle_test.cc index 9c8225a7599..2012184b21c 100644 --- a/absl/debugging/internal/demangle_test.cc +++ b/absl/debugging/internal/demangle_test.cc @@ -556,14 +556,15 @@ TEST(Demangle, Clones) { EXPECT_TRUE(Demangle("_ZL3Foov.part.9.165493.constprop.775.31805", tmp, sizeof(tmp))); EXPECT_STREQ("Foo()", tmp); - // Invalid (. without anything else), should not demangle. - EXPECT_FALSE(Demangle("_ZL3Foov.", tmp, sizeof(tmp))); - // Invalid (. with mix of alpha and digits), should not demangle. - EXPECT_FALSE(Demangle("_ZL3Foov.abc123", tmp, sizeof(tmp))); - // Invalid (.clone. not followed by number), should not demangle. - EXPECT_FALSE(Demangle("_ZL3Foov.clone.", tmp, sizeof(tmp))); - // Invalid (.constprop. not followed by number), should not demangle. - EXPECT_FALSE(Demangle("_ZL3Foov.isra.2.constprop.", tmp, sizeof(tmp))); + // Other suffixes should demangle too. + EXPECT_TRUE(Demangle("_ZL3Foov.", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); + EXPECT_TRUE(Demangle("_ZL3Foov.abc123", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); + EXPECT_TRUE(Demangle("_ZL3Foov.clone.", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); + EXPECT_TRUE(Demangle("_ZL3Foov.isra.2.constprop.", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); } TEST(Demangle, Discriminators) { From db8171fd8b686f89d31578d74156e7825c5251ab Mon Sep 17 00:00:00 2001 From: Omer Mor Date: Thu, 8 May 2025 10:54:47 -0700 Subject: [PATCH 012/107] Remove dependency on `wcsnlen` for string length calculation. The `wcsnlen` function is not consistently available across all target environments. Its prior use was intended as a micro-optimization and safeguard to limit string scanning to the remaining buffer size and prevent potential issues with malformed strings lacking a null terminator. This change now relies on the implicit `std::wstring_view` constructor to determine the string length. `CopyToEncodedBuffer` will continue to handle truncation effectively, mitigating the original concerns. PiperOrigin-RevId: 756378282 Change-Id: I858bad01724507f7926f868aa300eabad8a4358c --- absl/log/internal/log_message.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/absl/log/internal/log_message.cc b/absl/log/internal/log_message.cc index 07d17a02c1d..3aed3a2fdfd 100644 --- a/absl/log/internal/log_message.cc +++ b/absl/log/internal/log_message.cc @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -425,8 +424,7 @@ LogMessage& LogMessage::operator<< ( CopyToEncodedBuffer( absl::string_view(kCharNull.data(), kCharNull.size() - 1)); } else { - CopyToEncodedBuffer( - std::wstring_view(v, wcsnlen(v, data_->encoded_remaining().size()))); + CopyToEncodedBuffer(v); } return *this; } From a4950fba03217d0ea102c98ce795c189bba50383 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Thu, 8 May 2025 12:11:26 -0700 Subject: [PATCH 013/107] Use __builtin_is_cpp_trivially_relocatable to implement absl::is_trivially_relocatable in a way that is compatible with PR2786 in the upcoming C++26. https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p2786r11.html This change is being made now because Chromium is reporting that a recent LLVM commit adds deprecation warnings for __is_trivially_relocatable. https://github.com/llvm/llvm-project/pull/138835 PiperOrigin-RevId: 756408712 Change-Id: Iacf966ed2ebfd436d52d180f0dab34465b3c7176 --- absl/meta/type_traits.h | 27 ++++++++++++++------- absl/meta/type_traits_test.cc | 45 ----------------------------------- 2 files changed, 18 insertions(+), 54 deletions(-) diff --git a/absl/meta/type_traits.h b/absl/meta/type_traits.h index ba57e52f810..02c1e6309ca 100644 --- a/absl/meta/type_traits.h +++ b/absl/meta/type_traits.h @@ -324,11 +324,17 @@ using swap_internal::Swap; // absl::is_trivially_relocatable // +// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2024/p2786r11.html +// // Detects whether a type is known to be "trivially relocatable" -- meaning it // can be relocated from one place to another as if by memcpy/memmove. // This implies that its object representation doesn't depend on its address, // and also none of its special member functions do anything strange. // +// Note that when relocating the caller code should ensure that if the object is +// polymorphic, the dynamic type is of the most derived type. Padding bytes +// should not be copied. +// // This trait is conservative. If it's true then the type is definitely // trivially relocatable, but if it's false then the type may or may not be. For // example, std::vector is trivially relocatable on every known STL @@ -346,11 +352,7 @@ using swap_internal::Swap; // // Upstream documentation: // -// https://clang.llvm.org/docs/LanguageExtensions.html#:~:text=__is_trivially_relocatable - -// If the compiler offers a builtin that tells us the answer, we can use that. -// This covers all of the cases in the fallback below, plus types that opt in -// using e.g. [[clang::trivial_abi]]. +// https://clang.llvm.org/docs/LanguageExtensions.html#:~:text=__builtin_is_cpp_trivially_relocatable // // Clang on Windows has the builtin, but it falsely claims types with a // user-provided destructor are trivial (http://b/275003464). So we opt out @@ -375,15 +377,22 @@ using swap_internal::Swap; // // According to https://github.com/abseil/abseil-cpp/issues/1479, this does not // work with NVCC either. -#if ABSL_HAVE_BUILTIN(__is_trivially_relocatable) && \ - (defined(__cpp_impl_trivially_relocatable) || \ - (!defined(__clang__) && !defined(__APPLE__) && !defined(__NVCC__))) +#if ABSL_HAVE_BUILTIN(__builtin_is_cpp_trivially_relocatable) +// https://github.com/llvm/llvm-project/pull/127636#pullrequestreview-2637005293 +// In the current implementation, __builtin_is_cpp_trivially_relocatable will +// only return true for types that are trivially relocatable according to the +// standard. Notably, this means that marking a type [[clang::trivial_abi]] aka +// ABSL_HAVE_ATTRIBUTE_TRIVIAL_ABI will have no effect on this trait. template struct is_trivially_relocatable - : std::integral_constant {}; + : std::integral_constant { +}; #elif ABSL_HAVE_BUILTIN(__is_trivially_relocatable) && defined(__clang__) && \ !(defined(_WIN32) || defined(_WIN64)) && !defined(__APPLE__) && \ !defined(__NVCC__) +// https://github.com/llvm/llvm-project/pull/139061 +// __is_trivially_relocatable is deprecated. +// TODO(b/325479096): Remove this case. template struct is_trivially_relocatable : std::integral_constant< diff --git a/absl/meta/type_traits_test.cc b/absl/meta/type_traits_test.cc index 7c2dbbcfeb0..3d55a00ea11 100644 --- a/absl/meta/type_traits_test.cc +++ b/absl/meta/type_traits_test.cc @@ -333,51 +333,6 @@ TEST(TriviallyRelocatable, UserProvidedDestructor) { static_assert(!absl::is_trivially_relocatable::value, ""); } -// TODO(b/275003464): remove the opt-out for Clang on Windows once -// __is_trivially_relocatable is used there again. -// TODO(b/324278148): remove the opt-out for Apple once -// __is_trivially_relocatable is fixed there. -// TODO(b/325479096): remove the opt-out for Clang once -// __is_trivially_relocatable is fixed there. -#if defined(ABSL_HAVE_ATTRIBUTE_TRIVIAL_ABI) && \ - ABSL_HAVE_BUILTIN(__is_trivially_relocatable) && \ - (defined(__cpp_impl_trivially_relocatable) || \ - (!defined(__clang__) && !defined(__APPLE__) && !defined(__NVCC__))) -// A type marked with the "trivial ABI" attribute is trivially relocatable even -// if it has user-provided special members. -TEST(TriviallyRelocatable, TrivialAbi) { - struct ABSL_ATTRIBUTE_TRIVIAL_ABI S { - S(S&&) {} // NOLINT(modernize-use-equals-default) - S(const S&) {} // NOLINT(modernize-use-equals-default) - S& operator=(S&&) { return *this; } - S& operator=(const S&) { return *this; } - ~S() {} // NOLINT(modernize-use-equals-default) - }; - - static_assert(absl::is_trivially_relocatable::value, ""); -} -#endif - -// TODO(b/275003464): remove the opt-out for Clang on Windows once -// __is_trivially_relocatable is used there again. -// TODO(b/324278148): remove the opt-out for Apple once -// __is_trivially_relocatable is fixed there. -#if defined(ABSL_HAVE_ATTRIBUTE_TRIVIAL_ABI) && \ - ABSL_HAVE_BUILTIN(__is_trivially_relocatable) && defined(__clang__) && \ - !(defined(_WIN32) || defined(_WIN64)) && !defined(__APPLE__) && \ - !defined(__NVCC__) -// A type marked with the "trivial ABI" attribute is trivially relocatable even -// if it has a user-provided copy constructor and a user-provided destructor. -TEST(TriviallyRelocatable, TrivialAbi_NoUserProvidedMove) { - struct ABSL_ATTRIBUTE_TRIVIAL_ABI S { - S(const S&) {} // NOLINT(modernize-use-equals-default) - ~S() {} // NOLINT(modernize-use-equals-default) - }; - - static_assert(absl::is_trivially_relocatable::value, ""); -} -#endif - #ifdef ABSL_HAVE_CONSTANT_EVALUATED constexpr int64_t NegateIfConstantEvaluated(int64_t i) { From 0710718acaa0274e5ce125918ca82289b8ec7a90 Mon Sep 17 00:00:00 2001 From: Omer Mor Date: Thu, 8 May 2025 13:13:32 -0700 Subject: [PATCH 014/107] Added test cases for invalid surrogates sequences. While these are disabled now because the current implementation does not correctly handle them, this is a starting point for a future fix. PiperOrigin-RevId: 756431459 Change-Id: I9c498ce1a2e12baa32a46bcdc56e9354feee305a --- absl/log/log_format_test.cc | 70 ++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/absl/log/log_format_test.cc b/absl/log/log_format_test.cc index f4e33c98a45..6b7d1e555c2 100644 --- a/absl/log/log_format_test.cc +++ b/absl/log/log_format_test.cc @@ -1009,7 +1009,7 @@ TEST(WideStringLogFormatTest, Literal) { #undef ABSL_LOG_INTERNAL_WIDE_LITERAL #undef ABSL_LOG_INTERNAL_UTF8_LITERAL -TYPED_TEST(WideStringLogFormatTest, InvalidCharactersAreReplaced) { +TYPED_TEST(WideStringLogFormatTest, IsolatedLowSurrogatesAreReplaced) { absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); TypeParam value = L"AAA \xDC00 BBB"; @@ -1024,6 +1024,74 @@ TYPED_TEST(WideStringLogFormatTest, InvalidCharactersAreReplaced) { LOG(INFO) << value; } +TYPED_TEST(WideStringLogFormatTest, + DISABLED_IsolatedHighSurrogatesAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xD800 BBB"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA � BBB"; + // Currently, this is "AAA \xF0\x90 BBB". + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TYPED_TEST(WideStringLogFormatTest, + DISABLED_ConsecutiveHighSurrogatesAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xD800\xD800 BBB"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA �� BBB"; + // Currently, this is "AAA \xF0\x90\xF0\x90 BBB". + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TYPED_TEST(WideStringLogFormatTest, + DISABLED_HighHighLowSurrogateSequencesAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xD800\xD800\xDC00 BBB"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA �𐀀 BBB"; + // Currently, this is "AAA \xF0\x90𐀀 BBB". + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + +TYPED_TEST(WideStringLogFormatTest, + DISABLED_TrailingHighSurrogatesAreReplaced) { + absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); + + TypeParam value = L"AAA \xD800"; + // NOLINTNEXTLINE(readability/utf8) + absl::string_view utf8_value = "AAA �"; + // Currently, this is "AAA \xF0\x90". + + EXPECT_CALL(test_sink, Send(AllOf(TextMessage(Eq(utf8_value)), + ENCODED_MESSAGE(HasValues(ElementsAre( + ValueWithStr(Eq(utf8_value)))))))); + + test_sink.StartCapturingLogs(); + LOG(INFO) << value; +} + TYPED_TEST(WideStringLogFormatTest, EmptyWideString) { absl::ScopedMockLog test_sink(absl::MockLogDefault::kDisallowUnexpected); From 3eb2041985790d245f1ce6ae902c9dc811cd5d69 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Fri, 9 May 2025 11:39:53 -0700 Subject: [PATCH 015/107] Avoid requiring default-constructability of iterator type in algorithms that use ContainerIterPairType This includes absl::c_minmax_element, absl::c_equal_range, and absl::mismatch PiperOrigin-RevId: 756846820 Change-Id: I39f612224a98947f5ef9e9b7e53320df0bd99ce7 --- absl/algorithm/container.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/absl/algorithm/container.h b/absl/algorithm/container.h index 913268ddad3..6f9c1938fa8 100644 --- a/absl/algorithm/container.h +++ b/absl/algorithm/container.h @@ -75,8 +75,8 @@ using ContainerIter = decltype(begin(std::declval())); // An MSVC bug involving template parameter substitution requires us to use // decltype() here instead of just std::pair. template -using ContainerIterPairType = - decltype(std::make_pair(ContainerIter(), ContainerIter())); +using ContainerIterPairType = decltype(std::make_pair( + std::declval>(), std::declval>())); template using ContainerDifferenceType = decltype(std::distance( From 83e249f2648f8e99439140b8ea4087526fe4728e Mon Sep 17 00:00:00 2001 From: Omer Mor Date: Fri, 9 May 2025 12:12:00 -0700 Subject: [PATCH 016/107] Rewrite `WideToUtf8` for improved readability. This is supposed to be a zero-diff change. PiperOrigin-RevId: 756859112 Change-Id: Ia81a84bc5d1e6f2a1299ca0ff5dbcec48583ab76 --- absl/strings/internal/utf8.cc | 121 ++++++++++++++++++++--------- absl/strings/internal/utf8.h | 6 +- absl/strings/internal/utf8_test.cc | 17 +++- 3 files changed, 103 insertions(+), 41 deletions(-) diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc index 4370c7c73a4..61945f5869b 100644 --- a/absl/strings/internal/utf8.cc +++ b/absl/strings/internal/utf8.cc @@ -18,6 +18,7 @@ #include #include +#include #include "absl/base/config.h" @@ -25,7 +26,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { -size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { +size_t EncodeUTF8Char(char* buffer, char32_t utf8_char) { if (utf8_char <= 0x7F) { *buffer = static_cast(utf8_char); return 1; @@ -53,45 +54,93 @@ size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { } } -size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s) { - const auto v = static_cast(wc); - if (v < 0x80) { - *buf = static_cast(v); +size_t WideToUtf8(wchar_t wc, char* buf, ShiftState& s) { + // Reinterpret the output buffer `buf` as `unsigned char*` for subsequent + // bitwise operations. This ensures well-defined behavior for bit + // manipulations (avoiding issues with signed `char`) and is safe under C++ + // aliasing rules, as `unsigned char` can alias any type. + auto* ubuf = reinterpret_cast(buf); + const uint32_t v = static_cast(wc); + constexpr size_t kError = static_cast(-1); + + if (v <= 0x007F) { + // 1-byte sequence (U+0000 to U+007F). + // 0xxxxxxx. + ubuf[0] = (0b0111'1111 & v); + s = {}; // Reset surrogate state. return 1; - } else if (v < 0x800) { - *buf++ = static_cast(0xc0 | (v >> 6)); - *buf = static_cast(0x80 | (v & 0x3f)); + } else if (0x0080 <= v && v <= 0x07FF) { + // 2-byte sequence (U+0080 to U+07FF). + // 110xxxxx 10xxxxxx. + ubuf[0] = 0b1100'0000 | (0b0001'1111 & (v >> 6)); + ubuf[1] = 0b1000'0000 | (0b0011'1111 & v); + s = {}; // Reset surrogate state. return 2; - } else if (v < 0xd800 || (v - 0xe000) < 0x2000) { - *buf++ = static_cast(0xe0 | (v >> 12)); - *buf++ = static_cast(0x80 | ((v >> 6) & 0x3f)); - *buf = static_cast(0x80 | (v & 0x3f)); + } else if ((0x0800 <= v && v <= 0xD7FF) || (0xE000 <= v && v <= 0xFFFF)) { + // 3-byte sequence (U+0800 to U+D7FF or U+E000 to U+FFFF). + // Excludes surrogate code points U+D800-U+DFFF. + // 1110xxxx 10xxxxxx 10xxxxxx. + ubuf[0] = 0b1110'0000 | (0b0000'1111 & (v >> 12)); + ubuf[1] = 0b1000'0000 | (0b0011'1111 & (v >> 6)); + ubuf[2] = 0b1000'0000 | (0b0011'1111 & v); + s = {}; // Reset surrogate state. return 3; - } else if ((v - 0x10000) < 0x100000) { - *buf++ = static_cast(0xf0 | (v >> 18)); - *buf++ = static_cast(0x80 | ((v >> 12) & 0x3f)); - *buf++ = static_cast(0x80 | ((v >> 6) & 0x3f)); - *buf = static_cast(0x80 | (v & 0x3f)); - return 4; - } else if (v < 0xdc00) { - s.saw_high_surrogate = true; - s.bits = static_cast(v & 0x3); - const uint8_t high_bits = ((v >> 6) & 0xf) + 1; - *buf++ = static_cast(0xf0 | (high_bits >> 2)); - *buf = - static_cast(0x80 | static_cast((high_bits & 0x3) << 4) | - static_cast((v >> 2) & 0xf)); - return 2; - } else if (v < 0xe000 && s.saw_high_surrogate) { - *buf++ = static_cast(0x80 | static_cast(s.bits << 4) | - static_cast((v >> 6) & 0xf)); - *buf = static_cast(0x80 | (v & 0x3f)); - s.saw_high_surrogate = false; - s.bits = 0; - return 2; - } else { - return static_cast(-1); + } else if (0xD800 <= v && v <= 0xDBFF) { + // High Surrogate (U+D800 to U+DBFF). + // This part forms the first two bytes of an eventual 4-byte UTF-8 sequence. + const unsigned char high_bits_val = (0b0000'1111 & (v >> 6)) + 1; + + // First byte of the 4-byte UTF-8 sequence (11110xxx). + ubuf[0] = 0b1111'0000 | (0b0000'0111 & (high_bits_val >> 2)); + // Second byte of the 4-byte UTF-8 sequence (10xxxxxx). + ubuf[1] = 0b1000'0000 | // + (0b0011'0000 & (high_bits_val << 4)) | // + (0b0000'1111 & (v >> 2)); + // Set state for high surrogate after writing to buffer. + s = {true, static_cast(0b0000'0011 & v)}; + return 2; // Wrote 2 bytes, expecting 2 more from a low surrogate. + } else if (0xDC00 <= v && v <= 0xDFFF) { + // Low Surrogate (U+DC00 to U+DFFF). + // This part forms the last two bytes of a 4-byte UTF-8 sequence, + // using state from a preceding high surrogate. + if (!s.saw_high_surrogate) { + // Error: Isolated low surrogate without a preceding high surrogate. + // s remains in its current (problematic) state. + // Caller should handle error. + return kError; + } + + // Third byte of the 4-byte UTF-8 sequence (10xxxxxx). + ubuf[0] = 0b1000'0000 | // + (0b0011'0000 & (s.bits << 4)) | // + (0b0000'1111 & (v >> 6)); + // Fourth byte of the 4-byte UTF-8 sequence (10xxxxxx). + ubuf[1] = 0b1000'0000 | (0b0011'1111 & v); + + s = {}; // Reset surrogate state, pair complete. + return 2; // Wrote 2 more bytes, completing the 4-byte sequence. + } else if constexpr (0xFFFF < std::numeric_limits::max()) { + // Conditionally compile the 4-byte direct conversion branch. + // This block is compiled only if wchar_t can represent values > 0xFFFF. + // It's placed after surrogate checks to ensure surrogates are handled by + // their specific logic. This inner 'if' is the runtime check for the 4-byte + // range. At this point, v is known not to be in the 1, 2, or 3-byte BMP + // ranges, nor is it a surrogate code point. + if (0x10000 <= v && v <= 0x10FFFF) { + // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. + ubuf[0] = 0b1111'0000 | (0b0000'0111 & (v >> 18)); + ubuf[1] = 0b1000'0000 | (0b0011'1111 & (v >> 12)); + ubuf[2] = 0b1000'0000 | (0b0011'1111 & (v >> 6)); + ubuf[3] = 0b1000'0000 | (0b0011'1111 & v); + s = {}; // Reset surrogate state. + return 4; + } } + + // Invalid wchar_t value (e.g., out of Unicode range, or unhandled after all + // checks). + s = {}; // Reset surrogate state. + return kError; } } // namespace strings_internal diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h index f240408db44..ed1db110a7b 100644 --- a/absl/strings/internal/utf8.h +++ b/absl/strings/internal/utf8.h @@ -41,11 +41,11 @@ namespace strings_internal { // characters into buffer, however never will more than kMaxEncodedUTF8Size // bytes be written, regardless of the value of utf8_char. enum { kMaxEncodedUTF8Size = 4 }; -size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); +size_t EncodeUTF8Char(char* buffer, char32_t utf8_char); struct ShiftState { bool saw_high_surrogate = false; - uint8_t bits = 0; + unsigned char bits = 0; }; // Converts `wc` from UTF-16 or UTF-32 to UTF-8 and writes to `buf`. `buf` is @@ -55,7 +55,7 @@ struct ShiftState { // // This is basically std::wcrtomb(), but always outputting UTF-8 instead of // respecting the current locale. -size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s); +size_t WideToUtf8(wchar_t wc, char* buf, ShiftState& s); } // namespace strings_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/utf8_test.cc b/absl/strings/internal/utf8_test.cc index 62322dd1903..b88d7bb88a4 100644 --- a/absl/strings/internal/utf8_test.cc +++ b/absl/strings/internal/utf8_test.cc @@ -103,8 +103,21 @@ std::vector GetWideToUtf8TestCases() { {"BMP_MaxBeforeSurrogates_D7FF", L'\uD7FF', "\xED\x9F\xBF", 3}, {"BMP_FFFF", L'\uFFFF', "\xEF\xBF\xBF", 3}, - {"IsolatedHighSurr_D800", L'\xD800', "\xF0\x90", 2, {true, 0}, {true, 0}}, - {"IsolatedHighSurr_DBFF", L'\xDBFF', "\xF4\x8F", 2, {true, 3}, {true, 3}}, + {"IsolatedHighSurr_D800", L'\xD800', "\xF0\x90", 2, {}, {true, 0}}, + {"IsolatedHighSurr_DBFF", L'\xDBFF', "\xF4\x8F", 2, {}, {true, 3}}, + + {"HighSurr_D800_after_HighD800", + L'\xD800', + "\xF0\x90", + 2, + {true, 0}, + {true, 0}}, + {"HighSurr_DBFF_after_HighDBFF", + L'\xDBFF', + "\xF4\x8F", + 2, + {true, 3}, + {true, 3}}, {"LowSurr_DC00_after_HighD800", L'\xDC00', "\x80\x80", 2, {true, 0}, {}}, {"LowSurr_DFFD_after_HighDBFF", L'\xDFFD', "\xBF\xBD", 2, {true, 3}, {}}, From 4bf37d8e19bbea7e2ac4cd15d85615d6c803573e Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Fri, 9 May 2025 13:29:43 -0700 Subject: [PATCH 017/107] Fix a bug of casting sizeof(slot_type) to uint16_t instead of uint32_t. Note that in the above static_assert, we check that sizeof(slot_type) fits in a uint32_t, not a uint16_t and the field in PolicyFunctions is a uint32_t. PiperOrigin-RevId: 756886567 Change-Id: I5ca915b814d1da6cdc1d24152b2ebde5dec9d28b --- absl/container/internal/raw_hash_set.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 3bc86d19dec..3effc441ae1 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -3607,7 +3607,7 @@ class raw_hash_set { static constexpr PolicyFunctions value = { static_cast(sizeof(key_type)), static_cast(sizeof(value_type)), - static_cast(sizeof(slot_type)), + static_cast(sizeof(slot_type)), static_cast(alignof(slot_type)), SooEnabled(), ShouldSampleHashtablezInfoForAlloc(), // TODO(b/328722020): try to type erase From 9a89ea8714c06aee702058c6acc094bff02dfd42 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Fri, 9 May 2025 14:29:01 -0700 Subject: [PATCH 018/107] Import of CCTZ from GitHub. PiperOrigin-RevId: 756908046 Change-Id: I4db2b90fd1f6097f582b90c6aa82cdc4704d8b66 --- .../internal/cctz/src/time_zone_lookup.cc | 197 ++++++++---------- 1 file changed, 87 insertions(+), 110 deletions(-) diff --git a/absl/time/internal/cctz/src/time_zone_lookup.cc b/absl/time/internal/cctz/src/time_zone_lookup.cc index 80f73199ff4..f791797c1ee 100644 --- a/absl/time/internal/cctz/src/time_zone_lookup.cc +++ b/absl/time/internal/cctz/src/time_zone_lookup.cc @@ -33,23 +33,29 @@ #endif #if defined(_WIN32) -#include -// Include only when the SDK is for Windows 10 (and later), and the binary is -// targeted for Windows XP and later. -// Note: The Windows SDK added windows.globalization.h file for Windows 10, but -// MinGW did not add it until NTDDI_WIN10_NI (SDK version 10.0.22621.0). -#if ((defined(_WIN32_WINNT_WIN10) && !defined(__MINGW32__)) || \ - (defined(NTDDI_WIN10_NI) && NTDDI_VERSION >= NTDDI_WIN10_NI)) && \ - (_WIN32_WINNT >= _WIN32_WINNT_WINXP) +// Include only when is available. +// https://learn.microsoft.com/en-us/windows/win32/intl/international-components-for-unicode--icu- +// https://devblogs.microsoft.com/oldnewthing/20210527-00/?p=105255 +#if defined(__has_include) +#if __has_include() #define USE_WIN32_LOCAL_TIME_ZONE -#include -#include -#include -#include #include -#include -#endif -#endif +#pragma push_macro("_WIN32_WINNT") +#pragma push_macro("NTDDI_VERSION") +// Minimum _WIN32_WINNT and NTDDI_VERSION to use ucal_getTimeZoneIDForWindowsID +#undef _WIN32_WINNT +#define _WIN32_WINNT 0x0A00 // == _WIN32_WINNT_WIN10 +#undef NTDDI_VERSION +#define NTDDI_VERSION 0x0A000004 // == NTDDI_WIN10_RS3 +#include +#pragma pop_macro("NTDDI_VERSION") +#pragma pop_macro("_WIN32_WINNT") +#include + +#include +#endif // __has_include() +#endif // __has_include +#endif // _WIN32 #include #include @@ -65,80 +71,78 @@ namespace cctz { namespace { #if defined(USE_WIN32_LOCAL_TIME_ZONE) -// Calls the WinRT Calendar.GetTimeZone method to obtain the IANA ID of the -// local time zone. Returns an empty vector in case of an error. -std::string win32_local_time_zone(const HMODULE combase) { - std::string result; - const auto ro_activate_instance = - reinterpret_cast( - GetProcAddress(combase, "RoActivateInstance")); - if (!ro_activate_instance) { - return result; - } - const auto windows_create_string_reference = - reinterpret_cast( - GetProcAddress(combase, "WindowsCreateStringReference")); - if (!windows_create_string_reference) { - return result; - } - const auto windows_delete_string = - reinterpret_cast( - GetProcAddress(combase, "WindowsDeleteString")); - if (!windows_delete_string) { - return result; - } - const auto windows_get_string_raw_buffer = - reinterpret_cast( - GetProcAddress(combase, "WindowsGetStringRawBuffer")); - if (!windows_get_string_raw_buffer) { - return result; +// True if we have already failed to load the API. +static std::atomic_bool g_ucal_getTimeZoneIDForWindowsIDUnavailable; +static std::atomic + g_ucal_getTimeZoneIDForWindowsIDRef; + +std::string win32_local_time_zone() { + // If we have already failed to load the API, then just give up. + if (g_ucal_getTimeZoneIDForWindowsIDUnavailable.load()) { + return ""; } - // The string returned by WindowsCreateStringReference doesn't need to be - // deleted. - HSTRING calendar_class_id; - HSTRING_HEADER calendar_class_id_header; - HRESULT hr = windows_create_string_reference( - RuntimeClass_Windows_Globalization_Calendar, - sizeof(RuntimeClass_Windows_Globalization_Calendar) / sizeof(wchar_t) - 1, - &calendar_class_id_header, &calendar_class_id); - if (FAILED(hr)) { - return result; - } + auto ucal_getTimeZoneIDForWindowsIDFunc = + g_ucal_getTimeZoneIDForWindowsIDRef.load(); + if (ucal_getTimeZoneIDForWindowsIDFunc == nullptr) { + // If we have already failed to load the API, then just give up. + if (g_ucal_getTimeZoneIDForWindowsIDUnavailable.load()) { + return ""; + } - IInspectable* calendar; - hr = ro_activate_instance(calendar_class_id, &calendar); - if (FAILED(hr)) { - return result; + const HMODULE icudll = + ::LoadLibraryExW(L"icu.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32); + + if (icudll == nullptr) { + g_ucal_getTimeZoneIDForWindowsIDUnavailable.store(true); + return ""; + } + + ucal_getTimeZoneIDForWindowsIDFunc = + reinterpret_cast( + ::GetProcAddress(icudll, "ucal_getTimeZoneIDForWindowsID")); + + if (ucal_getTimeZoneIDForWindowsIDFunc == nullptr) { + g_ucal_getTimeZoneIDForWindowsIDUnavailable.store(true); + return ""; + } + // store-race is not a problem here, because ::GetProcAddress() returns the + // same address for the same function in the same DLL. + g_ucal_getTimeZoneIDForWindowsIDRef.store( + ucal_getTimeZoneIDForWindowsIDFunc); + + // We intentionally do not call ::FreeLibrary() here to avoid frequent DLL + // loadings and unloading. As "icu.dll" is a system library, keeping it on + // memory is supposed to have no major drawback. } - ABI::Windows::Globalization::ITimeZoneOnCalendar* time_zone; - hr = calendar->QueryInterface(IID_PPV_ARGS(&time_zone)); - if (FAILED(hr)) { - calendar->Release(); - return result; + DYNAMIC_TIME_ZONE_INFORMATION info = {}; + if (::GetDynamicTimeZoneInformation(&info) == TIME_ZONE_ID_INVALID) { + return ""; } - HSTRING tz_hstr; - hr = time_zone->GetTimeZone(&tz_hstr); - if (SUCCEEDED(hr)) { - UINT32 wlen; - const PCWSTR tz_wstr = windows_get_string_raw_buffer(tz_hstr, &wlen); - if (tz_wstr) { - const int size = - WideCharToMultiByte(CP_UTF8, 0, tz_wstr, static_cast(wlen), - nullptr, 0, nullptr, nullptr); - result.resize(static_cast(size)); - WideCharToMultiByte(CP_UTF8, 0, tz_wstr, static_cast(wlen), - &result[0], size, nullptr, nullptr); - } - windows_delete_string(tz_hstr); + UChar buffer[128]; + UErrorCode status = U_ZERO_ERROR; + const auto num_chars_in_buffer = ucal_getTimeZoneIDForWindowsIDFunc( + reinterpret_cast(info.TimeZoneKeyName), -1, nullptr, buffer, + ARRAYSIZE(buffer), &status); + if (status != U_ZERO_ERROR || num_chars_in_buffer <= 0 || + num_chars_in_buffer > ARRAYSIZE(buffer)) { + return ""; } - time_zone->Release(); - calendar->Release(); - return result; + + const int num_bytes_in_utf8 = ::WideCharToMultiByte( + CP_UTF8, 0, reinterpret_cast(buffer), + static_cast(num_chars_in_buffer), nullptr, 0, nullptr, nullptr); + std::string local_time_str; + local_time_str.resize(static_cast(num_bytes_in_utf8)); + ::WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast(buffer), + static_cast(num_chars_in_buffer), + &local_time_str[0], num_bytes_in_utf8, nullptr, + nullptr); + return local_time_str; } -#endif +#endif // USE_WIN32_LOCAL_TIME_ZONE } // namespace std::string time_zone::name() const { return effective_impl().Name(); } @@ -256,36 +260,9 @@ time_zone local_time_zone() { } #endif #if defined(USE_WIN32_LOCAL_TIME_ZONE) - // Use the WinRT Calendar class to get the local time zone. This feature is - // available on Windows 10 and later. The library is dynamically linked to - // maintain binary compatibility with Windows XP - Windows 7. On Windows 8, - // The combase.dll API functions are available but the RoActivateInstance - // call will fail for the Calendar class. - std::string winrt_tz; - const HMODULE combase = - LoadLibraryEx(_T("combase.dll"), nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32); - if (combase) { - const auto ro_initialize = reinterpret_cast( - GetProcAddress(combase, "RoInitialize")); - const auto ro_uninitialize = reinterpret_cast( - GetProcAddress(combase, "RoUninitialize")); - if (ro_initialize && ro_uninitialize) { - const HRESULT hr = ro_initialize(RO_INIT_MULTITHREADED); - // RPC_E_CHANGED_MODE means that a previous RoInitialize call specified - // a different concurrency model. The WinRT runtime is initialized and - // should work for our purpose here, but we should *not* call - // RoUninitialize because it's a failure. - if (SUCCEEDED(hr) || hr == RPC_E_CHANGED_MODE) { - winrt_tz = win32_local_time_zone(combase); - if (SUCCEEDED(hr)) { - ro_uninitialize(); - } - } - } - FreeLibrary(combase); - } - if (!winrt_tz.empty()) { - zone = winrt_tz.c_str(); + std::string win32_tz = win32_local_time_zone(); + if (!win32_tz.empty()) { + zone = win32_tz.c_str(); } #endif From 80b767438f16c21482b40a4028317988b623e652 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Mon, 12 May 2025 13:43:47 -0700 Subject: [PATCH 019/107] Remove relocatability test that is no longer useful PiperOrigin-RevId: 757895298 Change-Id: Id0a981b11499abc1471fe70589cfd5dfa13a9c2b --- absl/container/flat_hash_map_test.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/absl/container/flat_hash_map_test.cc b/absl/container/flat_hash_map_test.cc index 5c83c94136f..e1d9382a231 100644 --- a/absl/container/flat_hash_map_test.cc +++ b/absl/container/flat_hash_map_test.cc @@ -116,15 +116,6 @@ TEST(FlatHashMap, StandardLayout) { TEST(FlatHashMap, Relocatability) { static_assert(absl::is_trivially_relocatable::value); -#if ABSL_INTERNAL_CPLUSPLUS_LANG <= 202002L - // std::pair is not trivially copyable in C++23 in some standard - // library versions. - // See https://github.com/llvm/llvm-project/pull/95444 for instance. - // container_memory.h contains a workaround so what really matters - // is the transfer test below. - static_assert( - absl::is_trivially_relocatable>::value); -#endif static_assert( std::is_same::transfer>(nullptr, From 97680124250a74885318ef704202d0252f46a843 Mon Sep 17 00:00:00 2001 From: Thomas Schenker Date: Mon, 12 May 2025 13:46:01 -0700 Subject: [PATCH 020/107] PR #1884: Remove duplicate dependency Imported from GitHub PR https://github.com/abseil/abseil-cpp/pull/1884 Remove a duplicated dependency to `absl::tracing_internal`. Merge 6805baed9629e8461e4487acb6f51fc2ab9c1f87 into 9a89ea8714c06aee702058c6acc094bff02dfd42 Merging this change closes #1884 COPYBARA_INTEGRATE_REVIEW=https://github.com/abseil/abseil-cpp/pull/1884 from schenker:delete-duplicate-dependency 6805baed9629e8461e4487acb6f51fc2ab9c1f87 PiperOrigin-RevId: 757896259 Change-Id: I3eecc788e2ec995bd1320345749b5fe4046cbea4 --- absl/synchronization/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/absl/synchronization/CMakeLists.txt b/absl/synchronization/CMakeLists.txt index 9d4844d0a5b..ad45515c115 100644 --- a/absl/synchronization/CMakeLists.txt +++ b/absl/synchronization/CMakeLists.txt @@ -113,7 +113,6 @@ absl_cc_library( absl::raw_logging_internal absl::stacktrace absl::symbolize - absl::tracing_internal absl::time absl::tracing_internal Threads::Threads From 309adff2a5c2796b1678556080daf9d3629f43f9 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 12 May 2025 13:56:43 -0700 Subject: [PATCH 021/107] Delete deprecated `absl::Cord::Get` and its remaining call sites. PiperOrigin-RevId: 757900896 Change-Id: I0b56fcbcbd82d2decd1b59676a7af55d868be5a4 --- absl/strings/cord.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 7afa419a685..5aa232e7be4 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -755,7 +755,7 @@ class Cord { // NOTE: This routine is reasonably efficient. It is roughly // logarithmic based on the number of chunks that make up the cord. Still, // if you need to iterate over the contents of a cord, you should - // use a CharIterator/ChunkIterator rather than call operator[] or Get() + // use a CharIterator/ChunkIterator rather than call operator[] // repeatedly in a loop. char operator[](size_t i) const; From 9109163f1c3c2058d3a9f36665bb0dc0c747bc07 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 12 May 2025 15:21:16 -0700 Subject: [PATCH 022/107] Simplify MixingHashState::Read9To16 to not depend on endianness. PiperOrigin-RevId: 757933213 Change-Id: Ia4ce9f196e8098931c748600997b977811bb7e85 --- absl/hash/internal/hash.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 63b35490b21..7c90ab4dfef 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -1252,20 +1252,11 @@ class ABSL_DLL MixingHashState : public HashStateBase { size_t len); // Reads 9 to 16 bytes from p. - // The least significant 8 bytes are in .first, and the rest of the bytes are - // in .second along with duplicated bytes from .first if len<16. + // The first 8 bytes are in .first, and the rest of the bytes are in .second + // along with duplicated bytes from .first if len<16. static std::pair Read9To16(const unsigned char* p, size_t len) { - uint64_t low_mem = Read8(p); - uint64_t high_mem = Read8(p + len - 8); -#ifdef ABSL_IS_LITTLE_ENDIAN - uint64_t most_significant = high_mem; - uint64_t least_significant = low_mem; -#else - uint64_t most_significant = low_mem; - uint64_t least_significant = high_mem; -#endif - return {least_significant, most_significant}; + return {Read8(p), Read8(p + len - 8)}; } // Reads 8 bytes from p. From 9c8e8f948bc9bf13388d8ea2afdaadb311a71161 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 12 May 2025 15:24:46 -0700 Subject: [PATCH 023/107] Import of CCTZ from GitHub. PiperOrigin-RevId: 757934622 Change-Id: Ie22189d81f8432428d64847cf422aae5d9437105 --- .../internal/cctz/src/time_zone_lookup.cc | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/absl/time/internal/cctz/src/time_zone_lookup.cc b/absl/time/internal/cctz/src/time_zone_lookup.cc index f791797c1ee..e8f1d9307b4 100644 --- a/absl/time/internal/cctz/src/time_zone_lookup.cc +++ b/absl/time/internal/cctz/src/time_zone_lookup.cc @@ -57,6 +57,8 @@ #endif // __has_include #endif // _WIN32 +#include +#include #include #include #include @@ -121,25 +123,25 @@ std::string win32_local_time_zone() { return ""; } - UChar buffer[128]; + std::array buffer; UErrorCode status = U_ZERO_ERROR; const auto num_chars_in_buffer = ucal_getTimeZoneIDForWindowsIDFunc( - reinterpret_cast(info.TimeZoneKeyName), -1, nullptr, buffer, - ARRAYSIZE(buffer), &status); + reinterpret_cast(info.TimeZoneKeyName), -1, nullptr, + buffer.data(), static_cast(buffer.size()), &status); if (status != U_ZERO_ERROR || num_chars_in_buffer <= 0 || - num_chars_in_buffer > ARRAYSIZE(buffer)) { + num_chars_in_buffer > static_cast(buffer.size())) { return ""; } const int num_bytes_in_utf8 = ::WideCharToMultiByte( - CP_UTF8, 0, reinterpret_cast(buffer), + CP_UTF8, 0, reinterpret_cast(buffer.data()), static_cast(num_chars_in_buffer), nullptr, 0, nullptr, nullptr); std::string local_time_str; local_time_str.resize(static_cast(num_bytes_in_utf8)); - ::WideCharToMultiByte(CP_UTF8, 0, reinterpret_cast(buffer), - static_cast(num_chars_in_buffer), - &local_time_str[0], num_bytes_in_utf8, nullptr, - nullptr); + ::WideCharToMultiByte( + CP_UTF8, 0, reinterpret_cast(buffer.data()), + static_cast(num_chars_in_buffer), &local_time_str[0], + num_bytes_in_utf8, nullptr, nullptr); return local_time_str; } #endif // USE_WIN32_LOCAL_TIME_ZONE From d55845e6c119b000b316c05a3162b1b9bfb78f3e Mon Sep 17 00:00:00 2001 From: Wiktor Garbacz Date: Tue, 13 May 2025 03:48:50 -0700 Subject: [PATCH 024/107] stacktrace_x86: Handle nested signals on altstack PiperOrigin-RevId: 758147101 Change-Id: I569e095b4820862a7ebba68bd22ab205f43693e2 --- .../debugging/internal/stacktrace_x86-inl.inc | 11 +-- absl/debugging/stacktrace_test.cc | 73 +++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/absl/debugging/internal/stacktrace_x86-inl.inc b/absl/debugging/internal/stacktrace_x86-inl.inc index 96b128e04ea..bf6e5abaa6b 100644 --- a/absl/debugging/internal/stacktrace_x86-inl.inc +++ b/absl/debugging/internal/stacktrace_x86-inl.inc @@ -261,17 +261,18 @@ static void **NextStackFrame(void **old_fp, const void *uc, // it's supposed to. if (STRICT_UNWINDING && (!WITH_CONTEXT || uc == nullptr || new_fp_u != GetFP(uc))) { - // With the stack growing downwards, older stack frame must be - // at a greater address that the current one. - if (new_fp_u <= old_fp_u) return nullptr; - + // With the stack growing downwards, older stack frame should be + // at a greater address that the current one. However if we get multiple + // signals handled on altstack the new frame pointer might return to the + // main stack, but be different than the value from the most recent + // ucontext. // If we get a very large frame size, it may be an indication that we // guessed frame pointers incorrectly and now risk a paging fault // dereferencing a wrong frame pointer. Or maybe not because large frames // are possible as well. The main stack is assumed to be readable, // so we assume the large frame is legit if we know the real stack bounds // and are within the stack. - if (new_fp_u - old_fp_u > kMaxFrameBytes) { + if (new_fp_u <= old_fp_u || new_fp_u - old_fp_u > kMaxFrameBytes) { if (stack_high < kUnknownStackEnd && static_cast(getpagesize()) < stack_low) { // Stack bounds are known. diff --git a/absl/debugging/stacktrace_test.cc b/absl/debugging/stacktrace_test.cc index 4477d84c1df..e5565c1f70e 100644 --- a/absl/debugging/stacktrace_test.cc +++ b/absl/debugging/stacktrace_test.cc @@ -18,6 +18,9 @@ #include #include +#include +#include +#include #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -295,4 +298,74 @@ TEST(StackTrace, CanonicalFrameAddresses) { } #endif +// This test is Linux specific. +#if defined(__linux__) +const void* g_return_address = nullptr; +bool g_sigusr2_raised = false; + +void SigUsr2Handler(int, siginfo_t*, void* uc) { + // Many platforms don't support this by default. + bool support_is_expected = false; + constexpr int kMaxStackDepth = 64; + void* result[kMaxStackDepth]; + int depth = + absl::GetStackTraceWithContext(result, kMaxStackDepth, 0, uc, nullptr); + // Verify we can unwind past the nested signal handlers. + if (support_is_expected) { + EXPECT_THAT(absl::MakeSpan(result, static_cast(depth)), + Contains(g_return_address).Times(1)); + } + depth = absl::GetStackTrace(result, kMaxStackDepth, 0); + if (support_is_expected) { + EXPECT_THAT(absl::MakeSpan(result, static_cast(depth)), + Contains(g_return_address).Times(1)); + } + g_sigusr2_raised = true; +} + +void SigUsr1Handler(int, siginfo_t*, void*) { + raise(SIGUSR2); + ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); +} + +ABSL_ATTRIBUTE_NOINLINE void RaiseSignal() { + g_return_address = __builtin_return_address(0); + raise(SIGUSR1); + ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); +} + +ABSL_ATTRIBUTE_NOINLINE void TestNestedSignal() { + constexpr size_t kAltstackSize = 1 << 14; + // Allocate altstack on regular stack to make sure it'll have a higher + // address than some of the regular stack frames. + char space[kAltstackSize]; + stack_t altstack; + stack_t old_stack; + altstack.ss_sp = space; + altstack.ss_size = kAltstackSize; + altstack.ss_flags = 0; + ASSERT_EQ(sigaltstack(&altstack, &old_stack), 0) << strerror(errno); + struct sigaction act; + struct sigaction oldusr1act; + struct sigaction oldusr2act; + act.sa_sigaction = SigUsr1Handler; + act.sa_flags = SA_SIGINFO | SA_ONSTACK; + sigemptyset(&act.sa_mask); + ASSERT_EQ(sigaction(SIGUSR1, &act, &oldusr1act), 0) << strerror(errno); + act.sa_sigaction = SigUsr2Handler; + ASSERT_EQ(sigaction(SIGUSR2, &act, &oldusr2act), 0) << strerror(errno); + RaiseSignal(); + ASSERT_EQ(sigaltstack(&old_stack, nullptr), 0) << strerror(errno); + ASSERT_EQ(sigaction(SIGUSR1, &oldusr1act, nullptr), 0) << strerror(errno); + ASSERT_EQ(sigaction(SIGUSR2, &oldusr2act, nullptr), 0) << strerror(errno); + ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); +} + +TEST(StackTrace, NestedSignal) { + // Verify we can unwind past the nested signal handlers. + TestNestedSignal(); + EXPECT_TRUE(g_sigusr2_raised); +} +#endif + } // namespace From 30eea9b0d0a65c4e4630044149c1a3c9772f5562 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Tue, 13 May 2025 09:34:34 -0700 Subject: [PATCH 025/107] Exclude SWIG from ABSL_DEPRECATED and ABSL_DEPRECATE_AND_INLINE PiperOrigin-RevId: 758254733 Change-Id: I3f60d8117e4a37452409e28e706d677ef258c849 --- absl/base/macros.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/absl/base/macros.h b/absl/base/macros.h index ff89944ae4c..f9acdc8ca9e 100644 --- a/absl/base/macros.h +++ b/absl/base/macros.h @@ -197,9 +197,9 @@ ABSL_NAMESPACE_END // While open-source users do not have access to this service, the macro is // provided for compatibility, and so that users receive deprecation warnings. #if ABSL_HAVE_CPP_ATTRIBUTE(deprecated) && \ - ABSL_HAVE_CPP_ATTRIBUTE(clang::annotate) + ABSL_HAVE_CPP_ATTRIBUTE(clang::annotate) && !defined(SWIG) #define ABSL_DEPRECATE_AND_INLINE() [[deprecated, clang::annotate("inline-me")]] -#elif ABSL_HAVE_CPP_ATTRIBUTE(deprecated) +#elif ABSL_HAVE_CPP_ATTRIBUTE(deprecated) && !defined(SWIG) #define ABSL_DEPRECATE_AND_INLINE() [[deprecated]] #else #define ABSL_DEPRECATE_AND_INLINE() From e94af2deb9c935a710f4aa377605d31ba78ffa55 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Tue, 13 May 2025 12:41:08 -0700 Subject: [PATCH 026/107] Add comment explaining math behind expressions. PiperOrigin-RevId: 758333843 Change-Id: Icca99ea0d88caa6907be8f22ff23df18a2992892 --- absl/container/internal/raw_hash_set.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 3effc441ae1..85583742ac9 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1223,6 +1223,10 @@ constexpr size_t SizeToCapacity(size_t size) { // NormalizeCapacity(size). int leading_zeros = absl::countl_zero(size); constexpr size_t kLast3Bits = size_t{7} << (sizeof(size_t) * 8 - 3); + // max_size_for_next_capacity = max_load_factor * next_capacity + // = (7/8) * (~size_t{} >> leading_zeros) + // = (7/8*~size_t{}) >> leading_zeros + // = kLast3Bits >> leading_zeros size_t max_size_for_next_capacity = kLast3Bits >> leading_zeros; // Decrease shift if size is too big for the minimum capacity. leading_zeros -= static_cast(size > max_size_for_next_capacity); From ebb23039de27a464e395092c90208f7bcc5f397e Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 13 May 2025 13:57:47 -0700 Subject: [PATCH 027/107] Remove the "small" size designation for thread_identity_test, which causes the test to timeout after 60s. Motivation: this test times out flakily in some build configurations. PiperOrigin-RevId: 758364551 Change-Id: Ic988ec988ddb51439875cba30b137348c59457ee --- absl/base/BUILD.bazel | 1 - 1 file changed, 1 deletion(-) diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel index ef97b4ee8b2..3724ad11df6 100644 --- a/absl/base/BUILD.bazel +++ b/absl/base/BUILD.bazel @@ -685,7 +685,6 @@ cc_test( cc_test( name = "thread_identity_test", - size = "small", srcs = ["internal/thread_identity_test.cc"], copts = ABSL_TEST_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, From bf9816208252b60a463ae712603ddbc72d0eb5d7 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 13 May 2025 14:19:02 -0700 Subject: [PATCH 028/107] In SwissTable, don't hash the key when capacity<=1 on insertions. This is applying SOO-like hashing behavior for small non-SOO-enabled tables. Note that now we don't use the control bytes for capacity==1 tables, but we still allocate them. I left not allocating the control bytes in such cases as a followup. PiperOrigin-RevId: 758373480 Change-Id: I152b313b7d770eb10fd6d175b5d9a07801274436 --- absl/container/internal/raw_hash_set.cc | 333 ++++++++++--------- absl/container/internal/raw_hash_set.h | 237 +++++++------ absl/container/internal/raw_hash_set_test.cc | 71 +++- 3 files changed, 372 insertions(+), 269 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index 339e662d012..cea225e0dbb 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -118,6 +118,14 @@ size_t SingleGroupTableH1(size_t hash, PerTableSeed seed) { return hash ^ seed.seed(); } +// Returns the offset of the new element after resize from capacity 1 to 3. +size_t Resize1To3NewOffset(size_t hash, PerTableSeed seed) { + // After resize from capacity 1 to 3, we always have exactly the slot with + // index 1 occupied, so we need to insert either at index 0 or index 2. + static_assert(SooSlotIndex() == 1); + return SingleGroupTableH1(hash, seed) & 2; +} + // Returns the address of the slot `i` iterations after `slot` assuming each // slot has the specified size. inline void* NextSlot(void* slot, size_t slot_size, size_t i = 1) { @@ -175,35 +183,36 @@ FindInfo find_first_non_full_from_h1(const ctrl_t* ctrl, size_t h1, } } -// Whether a table is "small". A small table fits entirely into a probing -// group, i.e., has a capacity < `Group::kWidth`. +// Whether a table fits in half a group. A half-group table fits entirely into a +// probing group, i.e., has a capacity < `Group::kWidth`. // -// In small mode we are able to use the whole capacity. The extra control +// In half-group mode we are able to use the whole capacity. The extra control // bytes give us at least one "empty" control byte to stop the iteration. // This is important to make 1 a valid capacity. // -// In small mode only the first `capacity` control bytes after the sentinel +// In half-group mode only the first `capacity` control bytes after the sentinel // are valid. The rest contain dummy ctrl_t::kEmpty values that do not // represent a real slot. -constexpr bool is_small(size_t capacity) { +constexpr bool is_half_group(size_t capacity) { return capacity < Group::kWidth - 1; } template void IterateOverFullSlotsImpl(const CommonFields& c, size_t slot_size, Fn cb) { const size_t cap = c.capacity(); + ABSL_SWISSTABLE_ASSERT(!IsSmallCapacity(cap)); const ctrl_t* ctrl = c.control(); void* slot = c.slot_array(); - if (is_small(cap)) { - // Mirrored/cloned control bytes in small table are also located in the + if (is_half_group(cap)) { + // Mirrored/cloned control bytes in half-group table are also located in the // first group (starting from position 0). We are taking group from position // `capacity` in order to avoid duplicates. - // Small tables capacity fits into portable group, where + // Half-group tables capacity fits into portable group, where // GroupPortableImpl::MaskFull is more efficient for the // capacity <= GroupPortableImpl::kWidth. ABSL_SWISSTABLE_ASSERT(cap <= GroupPortableImpl::kWidth && - "unexpectedly large small capacity"); + "unexpectedly large half-group capacity"); static_assert(Group::kWidth >= GroupPortableImpl::kWidth, "unexpected group width"); // Group starts from kSentinel slot, so indices in the mask will @@ -410,7 +419,7 @@ size_t DropDeletesWithoutResizeAndPrepareInsert( return find_info.offset; } -static bool WasNeverFull(CommonFields& c, size_t index) { +bool WasNeverFull(CommonFields& c, size_t index) { if (is_single_group(c.capacity())) { return true; } @@ -449,38 +458,11 @@ void ResetCtrl(CommonFields& common, size_t slot_size) { SanitizerPoisonMemoryRegion(common.slot_array(), slot_size * capacity); } -// Initializes control bytes for single element table. -// Capacity of the table must be 1. -ABSL_ATTRIBUTE_ALWAYS_INLINE inline void InitializeSingleElementControlBytes( - uint64_t h2, ctrl_t* new_ctrl) { - static constexpr uint64_t kEmptyXorSentinel = - static_cast(ctrl_t::kEmpty) ^ - static_cast(ctrl_t::kSentinel); - static constexpr uint64_t kEmpty64 = static_cast(ctrl_t::kEmpty); - // The first 8 bytes, where present slot positions are replaced with 0. - static constexpr uint64_t kFirstCtrlBytesWithZeroes = - k8EmptyBytes ^ kEmpty64 ^ (kEmptyXorSentinel << 8) ^ (kEmpty64 << 16); - - // Fill the original 0th and mirrored 2nd bytes with the hash. - // Result will look like: - // HSHEEEEE - // Where H = h2, E = kEmpty, S = kSentinel. - const uint64_t first_ctrl_bytes = - (h2 | kFirstCtrlBytesWithZeroes) | (h2 << 16); - // Fill last bytes with kEmpty. - std::memset(new_ctrl + 1, static_cast(ctrl_t::kEmpty), Group::kWidth); - // Overwrite the first 3 bytes with HSH. Other bytes will not be changed. - absl::little_endian::Store64(new_ctrl, first_ctrl_bytes); -} - -// Initializes control bytes for growing after SOO to the next capacity. -// `soo_ctrl` is placed in the position `SooSlotIndex()`. -// `new_hash` is placed in the position `new_offset`. -// The table must be non-empty SOO. -ABSL_ATTRIBUTE_ALWAYS_INLINE inline void -InitializeThreeElementsControlBytesAfterSoo(ctrl_t soo_ctrl, size_t new_hash, - size_t new_offset, - ctrl_t* new_ctrl) { +// Initializes control bytes for growing from capacity 1 to 3. +// `orig_h2` is placed in the position `SooSlotIndex()`. +// `new_h2` is placed in the position `new_offset`. +ABSL_ATTRIBUTE_ALWAYS_INLINE inline void InitializeThreeElementsControlBytes( + h2_t orig_h2, h2_t new_h2, size_t new_offset, ctrl_t* new_ctrl) { static constexpr size_t kNewCapacity = NextCapacity(SooCapacity()); static_assert(kNewCapacity == 3); static_assert(is_single_group(kNewCapacity)); @@ -501,9 +483,9 @@ InitializeThreeElementsControlBytesAfterSoo(ctrl_t soo_ctrl, size_t new_hash, (kEmptyXorSentinel << (8 * kNewCapacity)) ^ (kEmpty64 << (8 * kMirroredSooSlotIndex)); - const uint64_t soo_h2 = static_cast(soo_ctrl); - const uint64_t new_h2_xor_empty = static_cast( - H2(new_hash) ^ static_cast(ctrl_t::kEmpty)); + const uint64_t soo_h2 = static_cast(orig_h2); + const uint64_t new_h2_xor_empty = + static_cast(new_h2 ^ static_cast(ctrl_t::kEmpty)); // Fill the original and mirrored bytes for SOO slot. // Result will look like: // EHESEHEE @@ -550,6 +532,12 @@ void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { c.decrement_size(); c.infoz().RecordErase(); + if (c.is_small()) { + SanitizerPoisonMemoryRegion(c.slot_array(), slot_size); + c.growth_info().OverwriteFullAsEmpty(); + return; + } + if (WasNeverFull(c, index)) { SetCtrl(c, index, ctrl_t::kEmpty, slot_size); c.growth_info().OverwriteFullAsEmpty(); @@ -608,6 +596,10 @@ size_t FindNewPositionsAndTransferSlots( slot, 1); return target.probe_length; }; + if (old_capacity == 1) { + if (common.size() == 1) insert_slot(old_slots); + return 0; + } size_t total_probe_length = 0; for (size_t i = 0; i < old_capacity; ++i) { if (IsFull(old_ctrl[i])) { @@ -618,6 +610,27 @@ size_t FindNewPositionsAndTransferSlots( return total_probe_length; } +struct BackingArrayPtrs { + ctrl_t* ctrl; + void* slots; +}; + +BackingArrayPtrs AllocBackingArray(CommonFields& common, + const PolicyFunctions& __restrict policy, + size_t new_capacity, bool has_infoz, + void* alloc) { + RawHashSetLayout layout(new_capacity, policy.slot_size, policy.slot_align, + has_infoz); + char* mem = static_cast(policy.alloc(alloc, layout.alloc_size())); + const GenerationType old_generation = common.generation(); + common.set_generation_ptr( + reinterpret_cast(mem + layout.generation_offset())); + common.set_generation(NextGeneration(old_generation)); + + return {reinterpret_cast(mem + layout.control_offset()), + mem + layout.slot_offset()}; +} + template void ResizeNonSooImpl(CommonFields& common, const PolicyFunctions& __restrict policy, @@ -632,19 +645,13 @@ void ResizeNonSooImpl(CommonFields& common, const size_t slot_size = policy.slot_size; const size_t slot_align = policy.slot_align; const bool has_infoz = infoz.IsSampled(); - - common.set_capacity(new_capacity); - RawHashSetLayout layout(new_capacity, slot_size, slot_align, has_infoz); void* alloc = policy.get_char_alloc(common); - char* mem = static_cast(policy.alloc(alloc, layout.alloc_size())); - const GenerationType old_generation = common.generation(); - common.set_generation_ptr( - reinterpret_cast(mem + layout.generation_offset())); - common.set_generation(NextGeneration(old_generation)); - ctrl_t* new_ctrl = reinterpret_cast(mem + layout.control_offset()); + common.set_capacity(new_capacity); + const auto [new_ctrl, new_slots] = + AllocBackingArray(common, policy, new_capacity, has_infoz, alloc); common.set_control(new_ctrl); - common.set_slots(mem + layout.slot_offset()); + common.set_slots(new_slots); size_t total_probe_length = 0; ResetCtrl(common, slot_size); @@ -739,7 +746,7 @@ void ResizeFullSooTable(CommonFields& common, ResizeFullSooTableSamplingMode sampling_mode) { AssertFullSoo(common, policy); const size_t slot_size = policy.slot_size; - const size_t slot_align = policy.slot_align; + void* alloc = policy.get_char_alloc(common); HashtablezInfoHandle infoz; if (sampling_mode == @@ -758,18 +765,10 @@ void ResizeFullSooTable(CommonFields& common, common.set_capacity(new_capacity); - RawHashSetLayout layout(new_capacity, slot_size, slot_align, has_infoz); - void* alloc = policy.get_char_alloc(common); - char* mem = static_cast(policy.alloc(alloc, layout.alloc_size())); - const GenerationType old_generation = common.generation(); - common.set_generation_ptr( - reinterpret_cast(mem + layout.generation_offset())); - common.set_generation(NextGeneration(old_generation)); - // We do not set control and slots in CommonFields yet to avoid overriding // SOO data. - ctrl_t* new_ctrl = reinterpret_cast(mem + layout.control_offset()); - void* new_slots = mem + layout.slot_offset(); + const auto [new_ctrl, new_slots] = + AllocBackingArray(common, policy, new_capacity, has_infoz, alloc); const size_t soo_slot_hash = policy.hash_slot(policy.hash_fn(common), common.soo_data()); @@ -1224,19 +1223,31 @@ size_t GrowToNextCapacityDispatch(CommonFields& common, } } -// Grows to next capacity and prepares insert for the given new_hash. -// Returns the offset of the new element. +void IncrementSmallSize(CommonFields& common, + const PolicyFunctions& __restrict policy) { + ABSL_SWISSTABLE_ASSERT(common.is_small()); + if (policy.soo_enabled) { + common.set_full_soo(); + } else { + common.increment_size(); + common.growth_info().OverwriteEmptyAsFull(); + SanitizerUnpoisonMemoryRegion(common.slot_array(), policy.slot_size); + } +} + +} // namespace + size_t GrowToNextCapacityAndPrepareInsert( CommonFields& common, const PolicyFunctions& __restrict policy, size_t new_hash) { ABSL_SWISSTABLE_ASSERT(common.growth_left() == 0); const size_t old_capacity = common.capacity(); - ABSL_SWISSTABLE_ASSERT(old_capacity == 0 || - old_capacity > policy.soo_capacity()); + ABSL_SWISSTABLE_ASSERT(old_capacity > policy.soo_capacity()); const size_t new_capacity = NextCapacity(old_capacity); ABSL_SWISSTABLE_ASSERT(IsValidCapacity(new_capacity)); ABSL_SWISSTABLE_ASSERT(new_capacity > policy.soo_capacity()); + ABSL_SWISSTABLE_ASSERT(!IsSmallCapacity(new_capacity)); ctrl_t* old_ctrl = common.control(); void* old_slots = common.slot_array(); @@ -1244,29 +1255,12 @@ size_t GrowToNextCapacityAndPrepareInsert( common.set_capacity(new_capacity); const size_t slot_size = policy.slot_size; const size_t slot_align = policy.slot_align; - HashtablezInfoHandle infoz; - if (old_capacity > 0) { - infoz = common.infoz(); - } else { - const bool should_sample = - policy.is_hashtablez_eligible && ShouldSampleNextTable(); - if (ABSL_PREDICT_FALSE(should_sample)) { - infoz = ForcedTrySample(slot_size, policy.key_size, policy.value_size, - policy.soo_capacity()); - } - } - const bool has_infoz = infoz.IsSampled(); - - RawHashSetLayout layout(new_capacity, slot_size, slot_align, has_infoz); void* alloc = policy.get_char_alloc(common); - char* mem = static_cast(policy.alloc(alloc, layout.alloc_size())); - const GenerationType old_generation = common.generation(); - common.set_generation_ptr( - reinterpret_cast(mem + layout.generation_offset())); - common.set_generation(NextGeneration(old_generation)); + HashtablezInfoHandle infoz = common.infoz(); + const bool has_infoz = infoz.IsSampled(); - ctrl_t* new_ctrl = reinterpret_cast(mem + layout.control_offset()); - void* new_slots = mem + layout.slot_offset(); + const auto [new_ctrl, new_slots] = + AllocBackingArray(common, policy, new_capacity, has_infoz, alloc); common.set_control(new_ctrl); common.set_slots(new_slots); SanitizerPoisonMemoryRegion(new_slots, new_capacity * slot_size); @@ -1274,41 +1268,43 @@ size_t GrowToNextCapacityAndPrepareInsert( h2_t new_h2 = H2(new_hash); size_t total_probe_length = 0; FindInfo find_info; - if (old_capacity == 0) { - static_assert(NextCapacity(0) == 1); - InitializeSingleElementControlBytes(new_h2, new_ctrl); - common.generate_new_seed(); - find_info = FindInfo{0, 0}; - SanitizerUnpoisonMemoryRegion(new_slots, slot_size); - } else { - if (ABSL_PREDICT_TRUE(is_single_group(new_capacity))) { + if (ABSL_PREDICT_TRUE(is_single_group(new_capacity))) { + size_t offset; + if (old_capacity == 1) { + size_t orig_hash = policy.hash_slot(policy.hash_fn(common), old_slots); + offset = Resize1To3NewOffset(new_hash, common.seed()); + InitializeThreeElementsControlBytes(H2(orig_hash), new_h2, offset, + new_ctrl); + void* target_slot = SlotAddress(new_slots, offset, slot_size); + SanitizerUnpoisonMemoryRegion(target_slot, slot_size); + } else { GrowIntoSingleGroupShuffleControlBytes(old_ctrl, old_capacity, new_ctrl, new_capacity); - // Single group tables have all slots full on resize. So we can transfer - // all slots without checking the control bytes. - ABSL_SWISSTABLE_ASSERT(common.size() == old_capacity); - auto* target = NextSlot(new_slots, slot_size); - SanitizerUnpoisonMemoryRegion(target, old_capacity * slot_size); - policy.transfer_n(&common, target, old_slots, old_capacity); // We put the new element either at the beginning or at the end of the // table with approximately equal probability. - size_t offset = SingleGroupTableH1(new_hash, common.seed()) & 1 - ? 0 - : new_capacity - 1; + offset = SingleGroupTableH1(new_hash, common.seed()) & 1 + ? 0 + : new_capacity - 1; ABSL_SWISSTABLE_ASSERT(IsEmpty(new_ctrl[offset])); SetCtrlInSingleGroupTable(common, offset, new_h2, policy.slot_size); - find_info = FindInfo{offset, 0}; - } else { - total_probe_length = - GrowToNextCapacityDispatch(common, policy, old_ctrl, old_slots); - find_info = find_first_non_full(common, new_hash); - SetCtrlInLargeTable(common, find_info.offset, new_h2, policy.slot_size); } - ABSL_SWISSTABLE_ASSERT(old_capacity > policy.soo_capacity()); - (*policy.dealloc)(alloc, old_capacity, old_ctrl, slot_size, slot_align, - has_infoz); + find_info = FindInfo{offset, 0}; + // Single group tables have all slots full on resize. So we can transfer + // all slots without checking the control bytes. + ABSL_SWISSTABLE_ASSERT(common.size() == old_capacity); + void* target = NextSlot(new_slots, slot_size); + SanitizerUnpoisonMemoryRegion(target, old_capacity * slot_size); + policy.transfer_n(&common, target, old_slots, old_capacity); + } else { + total_probe_length = + GrowToNextCapacityDispatch(common, policy, old_ctrl, old_slots); + find_info = find_first_non_full(common, new_hash); + SetCtrlInLargeTable(common, find_info.offset, new_h2, policy.slot_size); } + ABSL_SWISSTABLE_ASSERT(old_capacity > policy.soo_capacity()); + (*policy.dealloc)(alloc, old_capacity, old_ctrl, slot_size, slot_align, + has_infoz); PrepareInsertCommon(common); ResetGrowthLeft(GetGrowthInfoFromControl(new_ctrl), new_capacity, common.size()); @@ -1323,6 +1319,55 @@ size_t GrowToNextCapacityAndPrepareInsert( return find_info.offset; } +void SmallEmptyNonSooPrepareInsert(CommonFields& common, + const PolicyFunctions& __restrict policy, + absl::FunctionRef get_hash) { + ABSL_SWISSTABLE_ASSERT(common.is_small()); + ABSL_SWISSTABLE_ASSERT(!policy.soo_enabled); + if (common.capacity() == 1) { + IncrementSmallSize(common, policy); + return; + } + + constexpr size_t kNewCapacity = 1; + + common.set_capacity(kNewCapacity); + HashtablezInfoHandle infoz; + const bool should_sample = + policy.is_hashtablez_eligible && ShouldSampleNextTable(); + if (ABSL_PREDICT_FALSE(should_sample)) { + infoz = ForcedTrySample(policy.slot_size, policy.key_size, + policy.value_size, policy.soo_capacity()); + } + const bool has_infoz = infoz.IsSampled(); + void* alloc = policy.get_char_alloc(common); + + // TODO(b/413062340): don't allocate control bytes for capacity 1 tables. We + // don't use the control bytes in this case. + const auto [new_ctrl, new_slots] = + AllocBackingArray(common, policy, kNewCapacity, has_infoz, alloc); + common.set_control(new_ctrl); + common.set_slots(new_slots); + + static_assert(NextCapacity(0) == 1); + PrepareInsertCommon(common); + // TODO(b/413062340): maybe don't allocate growth info for capacity 1 tables. + // Doing so may require additional branches/complexity so it might not be + // worth it. + GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(0); + + if (ABSL_PREDICT_TRUE(!has_infoz)) return; + // TODO(b/413062340): we could potentially store infoz in place of the control + // pointer for the capacity 1 case. + common.set_has_infoz(); + infoz.RecordStorageChanged(/*size=*/0, kNewCapacity); + infoz.RecordRehash(/*total_probe_length=*/0); + infoz.RecordInsert(get_hash(), /*distance_from_desired=*/0); + common.set_infoz(infoz); +} + +namespace { + // Called whenever the table needs to vacate empty slots either by removing // tombstones via rehash or growth to next capacity. ABSL_ATTRIBUTE_NOINLINE @@ -1511,36 +1556,25 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert( ABSL_SWISSTABLE_ASSERT(common.size() == policy.soo_capacity()); static constexpr size_t kNewCapacity = NextCapacity(SooCapacity()); const size_t slot_size = policy.slot_size; - const size_t slot_align = policy.slot_align; + void* alloc = policy.get_char_alloc(common); common.set_capacity(kNewCapacity); // Since the table is not empty, it will not be sampled. // The decision to sample was already made during the first insertion. - RawHashSetLayout layout(kNewCapacity, slot_size, slot_align, - /*has_infoz=*/false); - void* alloc = policy.get_char_alloc(common); - char* mem = static_cast(policy.alloc(alloc, layout.alloc_size())); - const GenerationType old_generation = common.generation(); - common.set_generation_ptr( - reinterpret_cast(mem + layout.generation_offset())); - common.set_generation(NextGeneration(old_generation)); - + // // We do not set control and slots in CommonFields yet to avoid overriding // SOO data. - ctrl_t* new_ctrl = reinterpret_cast(mem + layout.control_offset()); - void* new_slots = mem + layout.slot_offset(); + const auto [new_ctrl, new_slots] = AllocBackingArray( + common, policy, kNewCapacity, /*has_infoz=*/false, alloc); PrepareInsertCommon(common); ABSL_SWISSTABLE_ASSERT(common.size() == 2); GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(kNewCapacity - 2); common.generate_new_seed(); - // After resize from capacity 1 to 3, we always have exactly the slot with - // index 1 occupied, so we need to insert either at index 0 or index 2. - static_assert(SooSlotIndex() == 1); - const size_t offset = SingleGroupTableH1(new_hash, common.seed()) & 2; - InitializeThreeElementsControlBytesAfterSoo(soo_slot_ctrl, new_hash, offset, - new_ctrl); + const size_t offset = Resize1To3NewOffset(new_hash, common.seed()); + InitializeThreeElementsControlBytes(static_cast(soo_slot_ctrl), + H2(new_hash), offset, new_ctrl); SanitizerPoisonMemoryRegion(new_slots, slot_size * kNewCapacity); void* target_slot = SlotAddress(new_slots, SooSlotIndex(), slot_size); @@ -1612,7 +1646,7 @@ void Rehash(CommonFields& common, const PolicyFunctions& __restrict policy, } ABSL_SWISSTABLE_ASSERT(slot_size <= sizeof(HeapOrSoo)); ABSL_SWISSTABLE_ASSERT(policy.slot_align <= alignof(HeapOrSoo)); - HeapOrSoo tmp_slot(uninitialized_tag_t{}); + HeapOrSoo tmp_slot; size_t begin_offset = FindFirstFullSlot(0, cap, common.control()); policy.transfer_n( &common, &tmp_slot, @@ -1655,19 +1689,22 @@ void Copy(CommonFields& common, const PolicyFunctions& __restrict policy, ABSL_SWISSTABLE_ASSERT(size > 0); const size_t soo_capacity = policy.soo_capacity(); const size_t slot_size = policy.slot_size; - if (size <= soo_capacity) { - ABSL_SWISSTABLE_ASSERT(size == 1); - common.set_full_soo(); + const bool soo_enabled = policy.soo_enabled; + if (size == 1) { + if (!soo_enabled) ReserveTableToFitNewSize(common, policy, 1); + IncrementSmallSize(common, policy); + const size_t other_capacity = other.capacity(); const void* other_slot = - other.capacity() <= soo_capacity - ? other.soo_data() - : SlotAddress( - other.slot_array(), - FindFirstFullSlot(0, other.capacity(), other.control()), - slot_size); - copy_fn(common.soo_data(), other_slot); - - if (policy.is_hashtablez_eligible && ShouldSampleNextTable()) { + other_capacity <= soo_capacity ? other.soo_data() + : other.is_small() + ? other.slot_array() + : SlotAddress(other.slot_array(), + FindFirstFullSlot(0, other_capacity, other.control()), + slot_size); + copy_fn(soo_enabled ? common.soo_data() : common.slot_array(), other_slot); + + if (soo_enabled && policy.is_hashtablez_eligible && + ShouldSampleNextTable()) { GrowFullSooTableToNextCapacityForceSampling(common, policy); } return; diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 85583742ac9..f5fdf66a22f 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -150,11 +150,11 @@ // To `insert`, we compose `unchecked_insert` with `find`. We compute `h(x)` and // perform a `find` to see if it's already present; if it is, we're done. If // it's not, we may decide the table is getting overcrowded (i.e. the load -// factor is greater than 7/8 for big tables; `is_small()` tables use a max load -// factor of 1); in this case, we allocate a bigger array, `unchecked_insert` -// each element of the table into the new array (we know that no insertion here -// will insert an already-present value), and discard the old backing array. At -// this point, we may `unchecked_insert` the value `x`. +// factor is greater than 7/8 for big tables; tables smaller than one probing +// group use a max load factor of 1); in this case, we allocate a bigger array, +// `unchecked_insert` each element of the table into the new array (we know that +// no insertion here will insert an already-present value), and discard the old +// backing array. At this point, we may `unchecked_insert` the value `x`. // // Below, `unchecked_insert` is partly implemented by `prepare_insert`, which // presents a viable, initialized slot pointee to the caller. @@ -381,6 +381,8 @@ constexpr bool IsNoThrowSwappable(std::false_type /* is_swappable */) { } // See definition comment for why this is size 32. +// TODO(b/413062340): we can probably reduce this to 16 now that it's only used +// for default-constructed iterators. ABSL_DLL extern const ctrl_t kEmptyGroup[32]; // We use these sentinel capacity values in debug mode to indicate different @@ -395,10 +397,11 @@ enum InvalidCapacity : size_t { kSelfMovedFrom, }; -// Returns a pointer to a control byte group that can be used by empty tables. +// Returns a pointer to a control byte group that can be used by +// default-constructed iterators. inline ctrl_t* EmptyGroup() { // Const must be cast away here; no uses of this function will actually write - // to it because it is only used for empty tables. + // to it because it is only used for default-constructed iterators. return const_cast(kEmptyGroup + 16); } @@ -781,6 +784,9 @@ static_assert(alignof(GrowthInfo) == alignof(size_t), ""); // A valid capacity is a non-zero integer `2^m - 1`. constexpr bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; } +// Whether a table is small enough that we don't need to hash any keys. +constexpr bool IsSmallCapacity(size_t capacity) { return capacity <= 1; } + // Returns the number of "cloned control bytes". // // This is the number of control bytes that are present both at the beginning @@ -865,31 +871,28 @@ struct HashtableFreeFunctionsAccess; // This allows us to work around an uninitialized memory warning when // constructing begin() iterators in empty hashtables. +template union MaybeInitializedPtr { - void* get() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(p); } - void set(void* ptr) { p = ptr; } + T* get() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(p); } + void set(T* ptr) { p = ptr; } - void* p; + T* p; }; struct HeapPtrs { - explicit HeapPtrs(uninitialized_tag_t) {} - explicit HeapPtrs(ctrl_t* c) : control(c) {} - // The control bytes (and, also, a pointer near to the base of the backing // array). // - // This contains `capacity + 1 + NumClonedBytes()` entries, even - // when the table is empty (hence EmptyGroup). + // This contains `capacity + 1 + NumClonedBytes()` entries. // // Note that growth_info is stored immediately before this pointer. - // May be uninitialized for SOO tables. - ctrl_t* control; + // May be uninitialized for small tables. + MaybeInitializedPtr control; // The beginning of the slots, located at `SlotOffset()` bytes after // `control`. May be uninitialized for empty tables. // Note: we can't use `slots` because Qt defines "slots" as a macro. - MaybeInitializedPtr slot_array; + MaybeInitializedPtr slot_array; }; // Returns the maximum size of the SOO slot. @@ -898,19 +901,16 @@ constexpr size_t MaxSooSlotSize() { return sizeof(HeapPtrs); } // Manages the backing array pointers or the SOO slot. When raw_hash_set::is_soo // is true, the SOO slot is stored in `soo_data`. Otherwise, we use `heap`. union HeapOrSoo { - explicit HeapOrSoo(uninitialized_tag_t) : heap(uninitialized_tag_t{}) {} - explicit HeapOrSoo(ctrl_t* c) : heap(c) {} - - ctrl_t*& control() { + MaybeInitializedPtr& control() { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.control); } - ctrl_t* control() const { + MaybeInitializedPtr control() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.control); } - MaybeInitializedPtr& slot_array() { + MaybeInitializedPtr& slot_array() { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.slot_array); } - MaybeInitializedPtr slot_array() const { + MaybeInitializedPtr slot_array() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.slot_array); } void* get_soo_data() { @@ -939,20 +939,13 @@ inline GrowthInfo& GetGrowthInfoFromControl(ctrl_t* control) { class CommonFields : public CommonFieldsGenerationInfo { public: explicit CommonFields(soo_tag_t) - : capacity_(SooCapacity()), - size_(no_seed_empty_tag_t{}), - heap_or_soo_(uninitialized_tag_t{}) {} + : capacity_(SooCapacity()), size_(no_seed_empty_tag_t{}) {} explicit CommonFields(full_soo_tag_t) - : capacity_(SooCapacity()), - size_(full_soo_tag_t{}), - heap_or_soo_(uninitialized_tag_t{}) {} + : capacity_(SooCapacity()), size_(full_soo_tag_t{}) {} explicit CommonFields(non_soo_tag_t) - : capacity_(0), - size_(no_seed_empty_tag_t{}), - heap_or_soo_(EmptyGroup()) {} + : capacity_(0), size_(no_seed_empty_tag_t{}) {} // For use in swapping. - explicit CommonFields(uninitialized_tag_t) - : size_(uninitialized_tag_t{}), heap_or_soo_(uninitialized_tag_t{}) {} + explicit CommonFields(uninitialized_tag_t) : size_(uninitialized_tag_t{}) {} // Not copyable CommonFields(const CommonFields&) = delete; @@ -979,7 +972,9 @@ class CommonFields : public CommonFieldsGenerationInfo { const void* soo_data() const { return heap_or_soo_.get_soo_data(); } void* soo_data() { return heap_or_soo_.get_soo_data(); } - ctrl_t* control() const { return heap_or_soo_.control(); } + ctrl_t* control() const { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap_or_soo_.control().get()); + } // When we set the control bytes, we also often want to generate a new seed. // So we bundle these two operations together to make sure we don't forget to @@ -989,7 +984,7 @@ class CommonFields : public CommonFieldsGenerationInfo { // being changed. In such cases, we will need to rehash the table. template void set_control(ctrl_t* c) { - heap_or_soo_.control() = c; + heap_or_soo_.control().set(c); if constexpr (kGenerateSeed) { generate_new_seed(); } @@ -1003,7 +998,9 @@ class CommonFields : public CommonFieldsGenerationInfo { // Note: we can't use slots() because Qt defines "slots" as a macro. void* slot_array() const { return heap_or_soo_.slot_array().get(); } - MaybeInitializedPtr slots_union() const { return heap_or_soo_.slot_array(); } + MaybeInitializedPtr slots_union() const { + return heap_or_soo_.slot_array(); + } void set_slots(void* s) { heap_or_soo_.slot_array().set(s); } // The number of filled slots. @@ -1049,6 +1046,7 @@ class CommonFields : public CommonFieldsGenerationInfo { c > kAboveMaxValidCapacity); capacity_ = c; } + bool is_small() const { return IsSmallCapacity(capacity_); } // The number of slots we can still fill without needing to rehash. // This is stored in the heap allocation before the control bytes. @@ -1824,6 +1822,17 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert(CommonFields& common, void GrowFullSooTableToNextCapacityForceSampling(CommonFields& common, const PolicyFunctions& policy); +// Grows to next capacity and prepares insert for the given new_hash. +// Returns the offset of the new element. +size_t GrowToNextCapacityAndPrepareInsert(CommonFields& common, + const PolicyFunctions& policy, + size_t new_hash); +// When growing from capacity 0 to 1, we only need the hash if the table ends up +// being sampled so don't compute it unless needed. +void SmallEmptyNonSooPrepareInsert(CommonFields& common, + const PolicyFunctions& policy, + absl::FunctionRef get_hash); + // Resizes table with allocated slots and change the table seed. // Tables with SOO enabled must have capacity > policy.soo_capacity. // No sampling will be performed since table is already allocated. @@ -1952,6 +1961,8 @@ class raw_hash_set { bool is_soo() const { return fits_in_soo(capacity()); } bool is_full_soo() const { return is_soo() && !empty(); } + bool is_small() const { return common().is_small(); } + // Give an early error when key_type is not hashable/eq. auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k)); auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k)); @@ -2072,7 +2083,7 @@ class raw_hash_set { // This constructor is used in begin() to avoid an MSan // use-of-uninitialized-value error. Delegating from this constructor to // the previous one doesn't avoid the error. - iterator(ctrl_t* ctrl, MaybeInitializedPtr slot, + iterator(ctrl_t* ctrl, MaybeInitializedPtr slot, const GenerationType* generation_ptr) : HashSetIteratorGenerationInfo(generation_ptr), ctrl_(ctrl), @@ -2369,7 +2380,7 @@ class raw_hash_set { iterator begin() ABSL_ATTRIBUTE_LIFETIME_BOUND { if (ABSL_PREDICT_FALSE(empty())) return end(); - if (capacity() == 1) return single_iterator(); + if (is_small()) return single_iterator(); iterator it = {control(), common().slots_union(), common().generation_ptr()}; it.skip_empty_or_deleted(); @@ -2423,9 +2434,11 @@ class raw_hash_set { const size_t cap = capacity(); if (cap == 0) { // Already guaranteed to be empty; so nothing to do. - } else if (is_soo()) { - if (!empty()) destroy(soo_slot()); - common().set_empty_soo(); + } else if (is_small()) { + if (!empty()) { + destroy(single_slot()); + decrement_small_size(); + } } else { destroy_slots(); clear_backing_array(/*reuse=*/cap < 128); @@ -2701,14 +2714,11 @@ class raw_hash_set { // This overload is necessary because otherwise erase(const K&) would be // a better match if non-const iterator is passed as an argument. void erase(iterator it) { + ABSL_SWISSTABLE_ASSERT(capacity() > 0); AssertNotDebugCapacity(); AssertIsFull(it.control(), it.generation(), it.generation_ptr(), "erase()"); destroy(it.slot()); - if (is_soo()) { - common().set_empty_soo(); - } else { - erase_meta_only(it); - } + erase_meta_only(it); } iterator erase(const_iterator first, @@ -2718,9 +2728,9 @@ class raw_hash_set { // capacity() > 0 as a precondition. if (empty()) return end(); if (first == last) return last.inner_; - if (is_soo()) { - destroy(soo_slot()); - common().set_empty_soo(); + if (is_small()) { + destroy(single_slot()); + erase_meta_only(single_iterator()); return end(); } if (first == begin() && last == end()) { @@ -2752,9 +2762,10 @@ class raw_hash_set { .second; }; - if (src.is_soo()) { + if (src.is_small()) { if (src.empty()) return; - if (insert_slot(src.soo_slot())) src.common().set_empty_soo(); + if (insert_slot(src.single_slot())) + src.erase_meta_only(src.single_iterator()); return; } for (auto it = src.begin(), e = src.end(); it != e;) { @@ -2775,11 +2786,7 @@ class raw_hash_set { position.inner_.generation_ptr(), "extract()"); allocator_type alloc(char_alloc_ref()); auto node = CommonAccess::Transfer(alloc, position.slot()); - if (is_soo()) { - common().set_empty_soo(); - } else { - erase_meta_only(position); - } + erase_meta_only(position); return node; } @@ -2855,7 +2862,7 @@ class raw_hash_set { template iterator find(const key_arg& key) ABSL_ATTRIBUTE_LIFETIME_BOUND { AssertOnFind(key); - if (capacity() <= 1) return find_small(key); + if (is_small()) return find_small(key); prefetch_heap_block(); return find_large(key, hash_of(key)); } @@ -3035,18 +3042,16 @@ class raw_hash_set { // SOO functionality. template iterator find_small(const key_arg& key) { - ABSL_SWISSTABLE_ASSERT(capacity() <= 1); - return empty() || !PolicyTraits::apply( - EqualElement{key, eq_ref()}, - PolicyTraits::element(single_slot())) + ABSL_SWISSTABLE_ASSERT(is_small()); + return empty() || !PolicyTraits::apply(EqualElement{key, eq_ref()}, + PolicyTraits::element(single_slot())) ? end() : single_iterator(); } template iterator find_large(const key_arg& key, size_t hash) { - ABSL_SWISSTABLE_ASSERT(capacity() > 1); - ABSL_SWISSTABLE_ASSERT(!is_soo()); + ABSL_SWISSTABLE_ASSERT(!is_small()); auto seq = probe(common(), hash); const h2_t h2 = H2(hash); const ctrl_t* ctrl = control(); @@ -3083,7 +3088,7 @@ class raw_hash_set { } void destroy_slots() { - ABSL_SWISSTABLE_ASSERT(!is_soo()); + ABSL_SWISSTABLE_ASSERT(!is_small()); if (PolicyTraits::template destroy_is_trivial()) return; auto destroy_slot = [&](const ctrl_t*, void* slot) { this->destroy(static_cast(slot)); @@ -3115,13 +3120,14 @@ class raw_hash_set { return; } if (capacity() == 0) return; - if (is_soo()) { + if (is_small()) { if (!empty()) { - ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(destroy(soo_slot())); + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(destroy(single_slot())); } - return; + if constexpr (SooEnabled()) return; + } else { + destroy_slots(); } - destroy_slots(); dealloc(); } @@ -3130,7 +3136,10 @@ class raw_hash_set { // This merely updates the pertinent control byte. This can be used in // conjunction with Policy::transfer to move the object to another place. void erase_meta_only(const_iterator it) { - ABSL_SWISSTABLE_ASSERT(!is_soo()); + if (is_soo()) { + common().set_empty_soo(); + return; + } EraseMetaOnly(common(), static_cast(it.control() - control()), sizeof(slot_type)); } @@ -3253,31 +3262,46 @@ class raw_hash_set { } template - std::pair find_or_prepare_insert_soo(const K& key) { - ctrl_t soo_slot_ctrl; + std::pair find_or_prepare_insert_small(const K& key) { + ABSL_SWISSTABLE_ASSERT(is_small()); + [[maybe_unused]] ctrl_t soo_slot_ctrl; if (empty()) { + if (!SooEnabled()) { + SmallEmptyNonSooPrepareInsert(common(), GetPolicyFunctions(), + [&] { return hash_of(key); }); + return {single_iterator(), true}; + } if (!should_sample_soo()) { common().set_full_soo(); - return {soo_iterator(), true}; + return {single_iterator(), true}; } soo_slot_ctrl = ctrl_t::kEmpty; } else if (PolicyTraits::apply(EqualElement{key, eq_ref()}, - PolicyTraits::element(soo_slot()))) { - return {soo_iterator(), false}; + PolicyTraits::element(single_slot()))) { + return {single_iterator(), false}; + } else if constexpr (SooEnabled()) { + soo_slot_ctrl = static_cast(H2(hash_of(single_slot()))); + } + ABSL_SWISSTABLE_ASSERT(capacity() == 1); + const size_t hash = hash_of(key); + size_t index; + if constexpr (SooEnabled()) { + constexpr bool kUseMemcpy = + PolicyTraits::transfer_uses_memcpy() && SooEnabled(); + index = GrowSooTableToNextCapacityAndPrepareInsert< + kUseMemcpy ? OptimalMemcpySizeForSooSlotTransfer(sizeof(slot_type)) + : 0, + kUseMemcpy>(common(), GetPolicyFunctions(), hash, soo_slot_ctrl); } else { - soo_slot_ctrl = static_cast(H2(hash_of(soo_slot()))); - } - constexpr bool kUseMemcpy = - PolicyTraits::transfer_uses_memcpy() && SooEnabled(); - size_t index = GrowSooTableToNextCapacityAndPrepareInsert< - kUseMemcpy ? OptimalMemcpySizeForSooSlotTransfer(sizeof(slot_type)) : 0, - kUseMemcpy>(common(), GetPolicyFunctions(), hash_of(key), - soo_slot_ctrl); + // TODO(b/413062340): add specialized function for growing from 1 to 3. + index = GrowToNextCapacityAndPrepareInsert(common(), GetPolicyFunctions(), + hash); + } return {iterator_at(index), true}; } template - std::pair find_or_prepare_insert_non_soo(const K& key) { + std::pair find_or_prepare_insert_large(const K& key) { ABSL_SWISSTABLE_ASSERT(!is_soo()); prefetch_heap_block(); const size_t hash = hash_of(key); @@ -3382,8 +3406,8 @@ class raw_hash_set { "hash/eq functors are inconsistent."); }; - if (is_soo()) { - assert_consistent(/*unused*/ nullptr, soo_slot()); + if (is_small()) { + assert_consistent(/*unused*/ nullptr, single_slot()); return; } // We only do validation for small tables so that it's constant time. @@ -3397,8 +3421,8 @@ class raw_hash_set { template std::pair find_or_prepare_insert(const K& key) { AssertOnFind(key); - if (is_soo()) return find_or_prepare_insert_soo(key); - return find_or_prepare_insert_non_soo(key); + if (is_small()) return find_or_prepare_insert_small(key); + return find_or_prepare_insert_large(key); } // Constructs the value in the space pointed by the iterator. This only works @@ -3413,9 +3437,9 @@ class raw_hash_set { void emplace_at(iterator iter, Args&&... args) { construct(iter.slot(), std::forward(args)...); - // When capacity is 1, find calls find_small and if size is 0, then it will + // When is_small, find calls find_small and if size is 0, then it will // return an end iterator. This can happen in the raw_hash_set copy ctor. - assert((capacity() == 1 || + assert((is_small() || PolicyTraits::apply(FindElement{*this}, *iter) == iter) && "constructed value does not match the lookup key"); } @@ -3486,22 +3510,23 @@ class raw_hash_set { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN( const_cast(this)->soo_slot()); } - iterator soo_iterator() { - return {SooControl(), soo_slot(), common().generation_ptr()}; - } - const_iterator soo_iterator() const { - return const_cast(this)->soo_iterator(); - } slot_type* single_slot() { - ABSL_SWISSTABLE_ASSERT(capacity() <= 1); + ABSL_SWISSTABLE_ASSERT(is_small()); return SooEnabled() ? soo_slot() : slot_array(); } const slot_type* single_slot() const { return const_cast(this)->single_slot(); } + void decrement_small_size() { + ABSL_SWISSTABLE_ASSERT(is_small()); + SooEnabled() ? common().set_empty_soo() : common().decrement_size(); + if (!SooEnabled()) { + SanitizerPoisonObject(single_slot()); + growth_info().OverwriteFullAsEmpty(); + } + } iterator single_iterator() { - return {SooEnabled() ? SooControl() : control(), single_slot(), - common().generation_ptr()}; + return {SooControl(), single_slot(), common().generation_ptr()}; } const_iterator single_iterator() const { return const_cast(this)->single_iterator(); @@ -3646,15 +3671,15 @@ struct HashtableFreeFunctionsAccess { if (c->empty()) { return 0; } - if (c->is_soo()) { - auto it = c->soo_iterator(); + if (c->is_small()) { + auto it = c->single_iterator(); if (!pred(*it)) { ABSL_SWISSTABLE_ASSERT(c->size() == 1 && "hash table was modified unexpectedly"); return 0; } c->destroy(it.slot()); - c->common().set_empty_soo(); + c->erase_meta_only(it); return 1; } ABSL_ATTRIBUTE_UNUSED const size_t original_size_for_assert = c->size(); @@ -3684,8 +3709,8 @@ struct HashtableFreeFunctionsAccess { if (c->empty()) { return; } - if (c->is_soo()) { - cb(*c->soo_iterator()); + if (c->is_small()) { + cb(*c->single_iterator()); return; } using SlotType = typename Set::slot_type; diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 9a323c41bec..a5cbd44d3b2 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -1283,6 +1283,9 @@ TYPED_TEST(SooTest, Contains2) { t.clear(); EXPECT_FALSE(t.contains(0)); + + EXPECT_TRUE(t.insert(0).second); + EXPECT_TRUE(t.contains(0)); } int decompose_constructed; @@ -2083,8 +2086,6 @@ TEST(Table, EraseInsertProbing) { TEST(Table, GrowthInfoDeletedBit) { BadTable t; - EXPECT_TRUE( - RawHashSetTestOnlyAccess::GetCommon(t).growth_info().HasNoDeleted()); int64_t init_count = static_cast( CapacityToGrowth(NormalizeCapacity(Group::kWidth + 1))); for (int64_t i = 0; i < init_count; ++i) { @@ -2604,6 +2605,19 @@ TEST(Table, Merge) { EXPECT_THAT(t2, UnorderedElementsAre(Pair("0", "~0"))); } +TEST(Table, MergeSmall) { + StringTable t1, t2; + t1.emplace("1", "1"); + t2.emplace("2", "2"); + + EXPECT_THAT(t1, UnorderedElementsAre(Pair("1", "1"))); + EXPECT_THAT(t2, UnorderedElementsAre(Pair("2", "2"))); + + t2.merge(t1); + EXPECT_EQ(t1.size(), 0); + EXPECT_THAT(t2, UnorderedElementsAre(Pair("1", "1"), Pair("2", "2"))); +} + TEST(Table, IteratorEmplaceConstructibleRequirement) { struct Value { explicit Value(absl::string_view view) : value(view) {} @@ -2690,6 +2704,24 @@ TEST(Nodes, ExtractInsert) { EXPECT_FALSE(node); // NOLINT(bugprone-use-after-move) } +TEST(Nodes, ExtractInsertSmall) { + constexpr char k0[] = "Very long string zero."; + StringTable t = {{k0, ""}}; + EXPECT_THAT(t, UnorderedElementsAre(Pair(k0, ""))); + + auto node = t.extract(k0); + EXPECT_EQ(t.size(), 0); + EXPECT_TRUE(node); + EXPECT_FALSE(node.empty()); + + StringTable t2; + StringTable::insert_return_type res = t2.insert(std::move(node)); + EXPECT_TRUE(res.inserted); + EXPECT_THAT(*res.position, Pair(k0, "")); + EXPECT_FALSE(res.node); + EXPECT_THAT(t2, UnorderedElementsAre(Pair(k0, ""))); +} + TYPED_TEST(SooTest, HintInsert) { TypeParam t = {1, 2, 3}; auto node = t.extract(1); @@ -2828,12 +2860,12 @@ TEST(TableDeathTest, InvalidIteratorAsserts) { NonSooIntTable t; // Extra simple "regexp" as regexp support is highly varied across platforms. - EXPECT_DEATH_IF_SUPPORTED(t.erase(t.end()), - "erase.* called on end.. iterator."); + EXPECT_DEATH_IF_SUPPORTED(++t.end(), "operator.* called on end.. iterator."); typename NonSooIntTable::iterator iter; EXPECT_DEATH_IF_SUPPORTED( ++iter, "operator.* called on default-constructed iterator."); t.insert(0); + t.insert(1); iter = t.begin(); t.erase(iter); const char* const kErasedDeathMessage = @@ -3644,11 +3676,13 @@ TEST(Iterator, InvalidComparisonDifferentTables) { EXPECT_DEATH_IF_SUPPORTED(void(t1.end() == default_constructed_iter), "Invalid iterator comparison.*default-constructed"); t1.insert(0); + t1.insert(1); EXPECT_DEATH_IF_SUPPORTED(void(t1.begin() == t2.end()), "Invalid iterator comparison.*empty hashtable"); EXPECT_DEATH_IF_SUPPORTED(void(t1.begin() == default_constructed_iter), "Invalid iterator comparison.*default-constructed"); t2.insert(0); + t2.insert(1); EXPECT_DEATH_IF_SUPPORTED(void(t1.begin() == t2.end()), "Invalid iterator comparison.*end.. iterator"); EXPECT_DEATH_IF_SUPPORTED(void(t1.begin() == t2.begin()), @@ -3687,40 +3721,47 @@ TEST(Table, CountedHash) { GTEST_SKIP() << "Only run under NDEBUG: `assert` statements may cause " "redundant hashing."; } + // When the table is sampled, we need to hash on the first insertion. + DisableSampling(); using Table = CountedHashIntTable; auto HashCount = [](const Table& t) { return t.hash_function().count; }; { Table t; + t.find(0); EXPECT_EQ(HashCount(t), 0); } { Table t; t.insert(1); - EXPECT_EQ(HashCount(t), 1); + t.find(1); + EXPECT_EQ(HashCount(t), 0); t.erase(1); - EXPECT_LE(HashCount(t), 2); + EXPECT_EQ(HashCount(t), 0); + t.insert(1); + t.insert(2); + EXPECT_EQ(HashCount(t), 2); } { Table t; t.insert(3); - EXPECT_EQ(HashCount(t), 1); + EXPECT_EQ(HashCount(t), 0); auto node = t.extract(3); - EXPECT_LE(HashCount(t), 2); + EXPECT_EQ(HashCount(t), 0); t.insert(std::move(node)); - EXPECT_LE(HashCount(t), 3); + EXPECT_EQ(HashCount(t), 0); } { Table t; t.emplace(5); - EXPECT_EQ(HashCount(t), 1); + EXPECT_EQ(HashCount(t), 0); } { Table src; src.insert(7); Table dst; dst.merge(src); - EXPECT_EQ(HashCount(dst), 1); + EXPECT_EQ(HashCount(dst), 0); } } @@ -3731,9 +3772,7 @@ TEST(Table, IterateOverFullSlotsEmpty) { auto fail_if_any = [](const ctrl_t*, void* i) { FAIL() << "expected no slots " << **static_cast(i); }; - container_internal::IterateOverFullSlots( - RawHashSetTestOnlyAccess::GetCommon(t), sizeof(SlotType), fail_if_any); - for (size_t i = 0; i < 256; ++i) { + for (size_t i = 2; i < 256; ++i) { t.reserve(i); container_internal::IterateOverFullSlots( RawHashSetTestOnlyAccess::GetCommon(t), sizeof(SlotType), fail_if_any); @@ -3745,7 +3784,9 @@ TEST(Table, IterateOverFullSlotsFull) { using SlotType = NonSooIntTableSlotType; std::vector expected_slots; - for (int64_t idx = 0; idx < 128; ++idx) { + t.insert(0); + expected_slots.push_back(0); + for (int64_t idx = 1; idx < 128; ++idx) { t.insert(idx); expected_slots.push_back(idx); From 472365f85636ab757e4668991dfac00c042b5008 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Wed, 14 May 2025 06:30:50 -0700 Subject: [PATCH 029/107] Remove the `salt` parameter from low level hash and use a global constant. That may potentially remove some loads. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also remove `LowLevelHashImpl` since it is adding an indirection to save the passing Seed() argument that is likely already in the register. ``` name old CYCLES/op new CYCLES/op delta BM_latency_AbslHash_Int32 16.1 ± 2% 16.0 ± 3% ~ (p=0.249 n=49+55) BM_latency_AbslHash_Int64 16.5 ± 3% 16.5 ± 3% ~ (p=0.862 n=51+50) BM_latency_AbslHash_String3 22.6 ± 0% 22.6 ± 1% +0.11% (p=0.012 n=54+55) BM_latency_AbslHash_String5 22.9 ±10% 22.9 ± 8% ~ (p=0.566 n=57+56) BM_latency_AbslHash_String9 22.9 ±14% 23.2 ±13% ~ (p=0.640 n=56+57) BM_latency_AbslHash_String17 21.9 ±10% 21.9 ± 7% ~ (p=0.409 n=52+55) BM_latency_AbslHash_String33 23.6 ± 4% 23.4 ± 5% ~ (p=0.098 n=53+55) BM_latency_AbslHash_String65 34.0 ±11% 32.5 ± 8% -4.41% (p=0.000 n=56+56) BM_latency_AbslHash_String257 53.0 ± 7% 52.3 ± 8% -1.31% (p=0.037 n=53+50) ``` PiperOrigin-RevId: 758656004 Change-Id: I9e828cde7d181da813aa8228b73d208ba7dc9042 --- absl/hash/internal/hash.cc | 7 +-- absl/hash/internal/hash.h | 24 +++----- absl/hash/internal/low_level_hash.cc | 27 +++++---- absl/hash/internal/low_level_hash.h | 11 +++- absl/hash/internal/low_level_hash_test.cc | 74 +++++++++++------------ 5 files changed, 67 insertions(+), 76 deletions(-) diff --git a/absl/hash/internal/hash.cc b/absl/hash/internal/hash.cc index 9abace5e2b7..d1765fb9a78 100644 --- a/absl/hash/internal/hash.cc +++ b/absl/hash/internal/hash.cc @@ -20,7 +20,7 @@ #include "absl/base/attributes.h" #include "absl/base/config.h" -#include "absl/hash/internal/low_level_hash.h" +#include "absl/hash/internal/city.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -55,11 +55,6 @@ uint64_t MixingHashState::CombineLargeContiguousImpl64( ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed; -uint64_t MixingHashState::LowLevelHashImpl(const unsigned char* data, - size_t len) { - return LowLevelHashLenGt32(data, len, Seed(), &kStaticRandomData[0]); -} - } // namespace hash_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 7c90ab4dfef..dfc9a6f73ff 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -80,6 +80,7 @@ #include "absl/base/port.h" #include "absl/container/fixed_array.h" #include "absl/hash/internal/city.h" +#include "absl/hash/internal/low_level_hash.h" #include "absl/hash/internal/weakly_mixed_integer.h" #include "absl/meta/type_traits.h" #include "absl/numeric/bits.h" @@ -1074,13 +1075,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { using uint128 = absl::uint128; #endif // ABSL_HAVE_INTRINSIC_INT128 - // Random data taken from the hexadecimal digits of Pi's fractional component. - // https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number - ABSL_CACHELINE_ALIGNED static constexpr uint64_t kStaticRandomData[] = { - 0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0, - 0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, - }; - static constexpr uint64_t kMul = uint64_t{0xdcb22ca68cb134ed}; @@ -1329,16 +1323,13 @@ class ABSL_DLL MixingHashState : public HashStateBase { return absl::gbswap_64(n * kMul); } - // An extern to avoid bloat on a direct call to LowLevelHash() with fixed - // values for both the seed and salt parameters. - static uint64_t LowLevelHashImpl(const unsigned char* data, size_t len); - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data, size_t len) { #ifdef ABSL_HAVE_INTRINSIC_INT128 - return LowLevelHashImpl(data, len); + return LowLevelHashLenGt32(data, len, Seed()); #else - return hash_internal::CityHash64(reinterpret_cast(data), len); + return hash_internal::CityHash64WithSeed( + reinterpret_cast(data), len, Seed()); #endif } @@ -1378,12 +1369,13 @@ class ABSL_DLL MixingHashState : public HashStateBase { inline uint64_t MixingHashState::CombineContiguousImpl( uint64_t state, const unsigned char* first, size_t len, std::integral_constant /* sizeof_size_t */) { - // For large values we use CityHash, for small ones we just use a - // multiplicative hash. + // For large values we use CityHash, for small ones we use custom low latency + // hash. if (len <= 8) { return CombineSmallContiguousImpl(state, first, len); } if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { + // TODO(b/417141985): expose and use CityHash32WithSeed. return Mix(state ^ hash_internal::CityHash32( reinterpret_cast(first), len), kMul); @@ -1396,7 +1388,7 @@ inline uint64_t MixingHashState::CombineContiguousImpl( uint64_t state, const unsigned char* first, size_t len, std::integral_constant /* sizeof_size_t */) { // For large values we use LowLevelHash or CityHash depending on the platform, - // for small ones we just use a multiplicative hash. + // for small ones we use custom low latency hash. if (len <= 8) { return CombineSmallContiguousImpl(state, first, len); } diff --git a/absl/hash/internal/low_level_hash.cc b/absl/hash/internal/low_level_hash.cc index 1a107ec674c..575cf745c5e 100644 --- a/absl/hash/internal/low_level_hash.cc +++ b/absl/hash/internal/low_level_hash.cc @@ -28,29 +28,30 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace hash_internal { namespace { + uint64_t Mix(uint64_t v0, uint64_t v1) { absl::uint128 p = v0; p *= v1; return absl::Uint128Low64(p) ^ absl::Uint128High64(p); } -uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state, - const uint64_t salt[5]) { + +uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) { uint64_t a = absl::base_internal::UnalignedLoad64(ptr); uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8); uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16); uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24); - uint64_t cs0 = Mix(a ^ salt[1], b ^ current_state); - uint64_t cs1 = Mix(c ^ salt[2], d ^ current_state); + uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state); + uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state); return cs0 ^ cs1; } + } // namespace -uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed, - const uint64_t salt[5]) { +uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed) { assert(len > 32); const uint8_t* ptr = static_cast(data); - uint64_t current_state = seed ^ salt[0] ^ len; + uint64_t current_state = seed ^ kStaticRandomData[0] ^ len; const uint8_t* last_32_ptr = ptr + len - 32; if (len > 64) { @@ -74,11 +75,11 @@ uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed, uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48); uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56); - current_state = Mix(a ^ salt[1], b ^ current_state); - duplicated_state0 = Mix(c ^ salt[2], d ^ duplicated_state0); + current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state); + duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0); - duplicated_state1 = Mix(e ^ salt[3], f ^ duplicated_state1); - duplicated_state2 = Mix(g ^ salt[4], h ^ duplicated_state2); + duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1); + duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2); ptr += 64; len -= 64; @@ -91,13 +92,13 @@ uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed, // We now have a data `ptr` with at most 64 bytes and the current state // of the hashing state machine stored in current_state. if (len > 32) { - current_state = Mix32Bytes(ptr, current_state, salt); + current_state = Mix32Bytes(ptr, current_state); } // We now have a data `ptr` with at most 32 bytes and the current state // of the hashing state machine stored in current_state. But we can // safely read from `ptr + len - 32`. - return Mix32Bytes(last_32_ptr, current_state, salt); + return Mix32Bytes(last_32_ptr, current_state); } } // namespace hash_internal diff --git a/absl/hash/internal/low_level_hash.h b/absl/hash/internal/low_level_hash.h index 49e9ec46bad..bb2821c8ca9 100644 --- a/absl/hash/internal/low_level_hash.h +++ b/absl/hash/internal/low_level_hash.h @@ -29,19 +29,26 @@ #include #include "absl/base/config.h" +#include "absl/base/optimization.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace hash_internal { +// Random data taken from the hexadecimal digits of Pi's fractional component. +// https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number +ABSL_CACHELINE_ALIGNED static constexpr uint64_t kStaticRandomData[] = { + 0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0, + 0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, +}; + // Hash function for a byte array. A 64-bit seed and a set of five 64-bit // integers are hashed into the result. The length must be greater than 32. // // To allow all hashable types (including string_view and Span) to depend on // this algorithm, we keep the API low-level, with as few dependencies as // possible. -uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed, - const uint64_t salt[5]); +uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed); } // namespace hash_internal ABSL_NAMESPACE_END diff --git a/absl/hash/internal/low_level_hash_test.cc b/absl/hash/internal/low_level_hash_test.cc index d370dc7b38b..fcfa6ebfa02 100644 --- a/absl/hash/internal/low_level_hash_test.cc +++ b/absl/hash/internal/low_level_hash_test.cc @@ -14,7 +14,7 @@ #include "absl/hash/internal/low_level_hash.h" -#include +#include #include #include "gmock/gmock.h" @@ -25,10 +25,6 @@ namespace { -static const uint64_t kSalt[5] = {0xa0761d6478bd642f, 0xe7037ed1a0b428dbl, - 0x8ebc6af09c88c6e3, 0x589965cc75374cc3l, - 0x1d8e4e27c47d124f}; - TEST(LowLevelHashTest, VerifyGolden) { constexpr size_t kNumGoldenOutputs = 94; static struct { @@ -366,38 +362,38 @@ TEST(LowLevelHashTest, VerifyGolden) { GTEST_SKIP() << "We only maintain golden data for little endian systems."; #else constexpr uint64_t kGolden[kNumGoldenOutputs] = { - 0x59b1542b0ff6b7b8, 0x3fb979d297096db9, 0xb391802c536343a9, - 0x94e0f7e4331081c4, 0x234d95e49e3ce30e, 0xca6351a3e568ed17, - 0xa62fcf7fa334293d, 0xb03111035f546067, 0x97b8c861e013d558, - 0xb6683803d9387949, 0xce5d907e0b3cb6a1, 0xab7466fae53ed201, - 0x8f13ca3f1cac3edd, 0xa2684a99cd909a2a, 0x03194f86b9440843, - 0xab3a745d96f75a66, 0xef2448606760ec3d, 0xd999e03247d5d5c5, - 0x4a25ab345d53f926, 0xa511b829ce9fc919, 0x4b76517f8e806cbf, - 0x006efd7ee09ff8d4, 0x790a4978bd0170a1, 0xc14f6e4b2dff057e, - 0xe0d2f4ae7c836d09, 0x4e2038a491ed939d, 0x23fd6f408e9598e0, - 0xa91cf8f1d92bcb08, 0x555cdec06df49d58, 0xe7d3e14bd6a8f3bd, - 0x4fdd25c1e75c009a, 0x3dffb8acf1ffbd17, 0x56946f33ed73a705, - 0x154c633d7690f3b0, 0x3e96f8e9a58a04e0, 0xb0279b244d3ccf9c, - 0x8571e87c882b2142, 0x9d9ada45132e7b41, 0xd5667655533f1dec, - 0x70607ace4ec36463, 0x691418d2eb63116c, 0xa70179d8e7142980, - 0xf8388d756bea25a7, 0xe5127c736d9826de, 0x7f1c95f9b6b656b6, - 0x66ab835b7bf4c7b3, 0xc03423b9a6db9728, 0xe88415a2b416b76d, - 0x8afd8c14d0b56c36, 0xe9a252b3ba217dad, 0x710150f5cd87a9ff, - 0xd66b147837fad9ae, 0x1af5f8ffbaa717a7, 0xe01f88d7a9a8ac17, - 0xd67870a7251fde72, 0xf32b837f845a676b, 0x0827717b1ffe59f7, - 0x80307212ca7645fb, 0xf0d22af71ea57c80, 0x459373765f2c114b, - 0x54d26109fab9cbaf, 0xc603da4e257b93db, 0x57fa334b5689d7d5, - 0x41cd1b2a8a91f620, 0xe1d6e7cd0fb015af, 0x8608e9035eb9d795, - 0x45c7b9fae739fee1, 0x9f5ae4f7a6b597ee, 0xfb771b6e0017757d, - 0x8dac6d29cfd8d027, 0x3c9ba4fb62ce6508, 0xa971fad8243844a7, - 0xd2126f49b2ea3b64, 0x5dd78fe7ac436861, 0xfe4004a6bb3494a8, - 0xe7c01cc63d770d7c, 0xa117075b8c801d37, 0xdf1dfe75f0e73069, - 0x7285b39700cefb98, 0x5e97ea1aa9a670eb, 0xe21872db2b9137a3, - 0x12630b02c6ca405e, 0xfe1f2d802151f97a, 0xb53b0ed3dea4fb02, - 0xc6d5ed56d1dbf9fd, 0xe5b92b558a5c70cb, 0xccd6eedf97277d08, - 0x08582fff2e1494ed, 0xa41f2b3d17f1c4c7, 0x29ec07e5ef950f3d, - 0x96aba32565a97084, 0xf26870eca10cebcd, 0xbe1432feb4d33361, - 0x21993a779845e6eb, + 0x669da02f8d009e0f, 0xceb19bf2255445cd, 0x0e746992d6d43a7c, + 0x41ed623b9dcc5fde, 0x187a5a30d7c72edc, 0x949ae2a9c1eb925a, + 0x7e9c76a7b7c35e68, 0x4f96bf15b8309ff6, 0x26c0c1fde233732e, + 0xb0453f72aa151615, 0xf24b621a9ce9fece, 0x99ed798408687b5f, + 0x3b13ec1221423b66, 0xc67cf148a28afe59, 0x22f7e0173f92e3fa, + 0x14186c5fda6683a0, 0x97d608caa2603b2c, 0xfde3b0bbba24ffa9, + 0xb7068eb48c472c77, 0x9e34d72866b9fda0, 0xbbb99c884cdef88e, + 0x81d3e01f472a8a1a, 0xf84f506b3b60366d, 0xfe3f42f01300db37, + 0xe385712a51c1f836, 0x41dfd5e394245c79, 0x60855dbedadb900a, + 0xbdb4c0aa38567476, 0x9748802e8eec02cc, 0x5ced256d257f88de, + 0x55acccdf9a80f155, 0xa64b55b071afbbea, 0xa205bfe6c724ce4d, + 0x69dd26ca8ac21744, 0xef80e2ff2f6a9bc0, 0xde266c0baa202c20, + 0xfa3463080ac74c50, 0x379d968a40125c2b, 0x4cbbd0a7b3c7d648, + 0xc92afd93f4c665d2, 0x6e28f5adb7ae38dc, 0x7c689c9c237be35e, + 0xaea41b29bd9d0f73, 0x832cef631d77e59f, 0x70cac8e87bc37dd3, + 0x8e8c98bbde68e764, 0xd6117aeb3ddedded, 0xd796ab808e766240, + 0x8953d0ea1a7d9814, 0xa212eba4281b391c, 0x21a555a8939ce597, + 0x809d31660f6d81a8, 0x2356524b20ab400f, 0x5bc611e1e49d0478, + 0xba9c065e2f385ce2, 0xb0a0fd12f4e83899, 0x14d076a35b1ff2ca, + 0x8acd0bb8cf9a93c0, 0xe62e8ec094039ee4, 0x38a536a7072bdc61, + 0xca256297602524f8, 0xfc62ebfb3530caeb, 0x8d8b0c05520569f6, + 0xbbaca65cf154c59d, 0x3739b5ada7e338d3, 0xdb9ea31f47365340, + 0x410b5c9c1da56755, 0x7e0abc03dbd10283, 0x136f87be70ed442e, + 0x6b727d4feddbe1e9, 0x074ebb21183b01df, 0x3fe92185b1985484, + 0xc5d8efd3c68305ca, 0xd9bada21b17e272e, 0x64d73133e1360f83, + 0xeb8563aa993e21f9, 0xe5e8da50cceab28f, 0x7a6f92eb3223d2f3, + 0xbdaf98370ea9b31b, 0x1682a84457f077bc, 0x4abd2d33b6e3be37, + 0xb35bc81a7c9d4c04, 0x3e5bde3fb7cfe63d, 0xff3abe6e2ffec974, + 0xb8116dd26cf6feec, 0x7a77a6e4ed0cf081, 0xb71eec2d5a184316, + 0x6fa932f77b4da817, 0x795f79b33909b2c4, 0x1b8755ef6b5eb34e, + 0x2255b72d7d6b2d79, 0xf2bdafafa90bd50a, 0x442a578f02cb1fc8, + 0xc25aefe55ecf83db, }; #endif @@ -408,7 +404,7 @@ TEST(LowLevelHashTest, VerifyGolden) { ASSERT_TRUE(absl::Base64Unescape(cases[i].base64_data, &str)); ASSERT_GT(str.size(), 32); uint64_t h = absl::hash_internal::LowLevelHashLenGt32( - str.data(), str.size(), cases[i].seed, kSalt); + str.data(), str.size(), cases[i].seed); printf("0x%016" PRIx64 ", ", h); if (i % 3 == 2) { printf("\n"); @@ -424,7 +420,7 @@ TEST(LowLevelHashTest, VerifyGolden) { ASSERT_TRUE(absl::Base64Unescape(cases[i].base64_data, &str)); ASSERT_GT(str.size(), 32); EXPECT_EQ(absl::hash_internal::LowLevelHashLenGt32(str.data(), str.size(), - cases[i].seed, kSalt), + cases[i].seed), kGolden[i]); } #endif From bc354a995cddb88db334c7c22f63e6be537c2e8a Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Wed, 14 May 2025 07:57:32 -0700 Subject: [PATCH 030/107] Avoid subtracting `it.control()` and `table.control()` in single element table during erase. `it.control()` is pointing to `kSooControl` global buffer. The result of subtraction is not used, but that causes UBSAN pointer overflow error. PiperOrigin-RevId: 758683515 Change-Id: Ifa9e3f7f2da3de371dd65f3e1d42c77c20aece59 --- absl/container/internal/raw_hash_set.cc | 7 ++++--- absl/container/internal/raw_hash_set.h | 8 +++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index cea225e0dbb..d91b0211ec9 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -526,9 +526,8 @@ ABSL_ATTRIBUTE_ALWAYS_INLINE inline void InitializeThreeElementsControlBytes( } // namespace -void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { - ABSL_SWISSTABLE_ASSERT(IsFull(c.control()[index]) && - "erasing a dangling iterator"); +void EraseMetaOnly(CommonFields& c, const ctrl_t* ctrl, size_t slot_size) { + ABSL_SWISSTABLE_ASSERT(IsFull(*ctrl) && "erasing a dangling iterator"); c.decrement_size(); c.infoz().RecordErase(); @@ -538,6 +537,8 @@ void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) { return; } + size_t index = static_cast(ctrl - c.control()); + if (WasNeverFull(c, index)) { SetCtrl(c, index, ctrl_t::kEmpty, slot_size); c.growth_info().OverwriteFullAsEmpty(); diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index f5fdf66a22f..0be990212b1 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1847,7 +1847,7 @@ void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy, void* alloc, bool reuse, bool soo_enabled); // Type-erased version of raw_hash_set::erase_meta_only. -void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size); +void EraseMetaOnly(CommonFields& c, const ctrl_t* ctrl, size_t slot_size); // For trivially relocatable types we use memcpy directly. This allows us to // share the same function body for raw_hash_set instantiations that have the @@ -3140,8 +3140,7 @@ class raw_hash_set { common().set_empty_soo(); return; } - EraseMetaOnly(common(), static_cast(it.control() - control()), - sizeof(slot_type)); + EraseMetaOnly(common(), it.control(), sizeof(slot_type)); } template @@ -3691,8 +3690,7 @@ struct HashtableFreeFunctionsAccess { auto* slot = static_cast(slot_void); if (pred(Set::PolicyTraits::element(slot))) { c->destroy(slot); - EraseMetaOnly(c->common(), static_cast(ctrl - c->control()), - sizeof(*slot)); + EraseMetaOnly(c->common(), ctrl, sizeof(*slot)); ++num_deleted; } }); From fdf821ec60ac42e489ee0e81c330e8ebe027ce24 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Wed, 14 May 2025 12:13:57 -0700 Subject: [PATCH 031/107] Add tags to skip some tests under UBSAN. PiperOrigin-RevId: 758783019 Change-Id: I3b5d6a7cce9a001a7dbc3edb9dd5c7e9132b33dd --- absl/hash/BUILD.bazel | 2 ++ absl/synchronization/BUILD.bazel | 1 + 2 files changed, 3 insertions(+) diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index b2ffcd08d5d..8176cd92a0f 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -82,6 +82,8 @@ cc_test( ], copts = ABSL_TEST_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, + # TODO(b/417700722): Fix HashValueTest.PointerAlignment reporting more collisions under ubsan. + tags = ["noubsan"], deps = [ ":hash", ":hash_testing", diff --git a/absl/synchronization/BUILD.bazel b/absl/synchronization/BUILD.bazel index 920928e2328..5c490121f6e 100644 --- a/absl/synchronization/BUILD.bazel +++ b/absl/synchronization/BUILD.bazel @@ -360,6 +360,7 @@ cc_test( linkopts = ABSL_DEFAULT_LINKOPTS, tags = [ "no_test_wasm", + "noubsan", # TODO(b/417700722): timeouts under UBSAN. ], deps = [ ":per_thread_sem_test_common", From 48e429c98619822da19d57699410b85df2988f73 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Wed, 14 May 2025 13:27:35 -0700 Subject: [PATCH 032/107] Revert- Doesn't actually work because SWIG doesn't use the full preprocessor PiperOrigin-RevId: 758810429 Change-Id: Ic75609eacf736712851605d346b4c6ba1dc6cc1b --- absl/base/macros.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/absl/base/macros.h b/absl/base/macros.h index f9acdc8ca9e..ff89944ae4c 100644 --- a/absl/base/macros.h +++ b/absl/base/macros.h @@ -197,9 +197,9 @@ ABSL_NAMESPACE_END // While open-source users do not have access to this service, the macro is // provided for compatibility, and so that users receive deprecation warnings. #if ABSL_HAVE_CPP_ATTRIBUTE(deprecated) && \ - ABSL_HAVE_CPP_ATTRIBUTE(clang::annotate) && !defined(SWIG) + ABSL_HAVE_CPP_ATTRIBUTE(clang::annotate) #define ABSL_DEPRECATE_AND_INLINE() [[deprecated, clang::annotate("inline-me")]] -#elif ABSL_HAVE_CPP_ATTRIBUTE(deprecated) && !defined(SWIG) +#elif ABSL_HAVE_CPP_ATTRIBUTE(deprecated) #define ABSL_DEPRECATE_AND_INLINE() [[deprecated]] #else #define ABSL_DEPRECATE_AND_INLINE() From 2fe3c4b14efcd2cfd2b9ef186f2c7fde66f29e8e Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 15 May 2025 00:45:28 -0700 Subject: [PATCH 033/107] Indent absl container examples consistently PiperOrigin-RevId: 759023113 Change-Id: I84d18d16abddf70a00160e2bc3c25d5a90f52e79 --- absl/container/btree_map.h | 8 ++++---- absl/container/btree_set.h | 8 ++++---- absl/container/flat_hash_map.h | 26 +++++++++++++------------- absl/container/flat_hash_set.h | 22 +++++++++++----------- absl/container/node_hash_map.h | 26 +++++++++++++------------- absl/container/node_hash_set.h | 22 +++++++++++----------- 6 files changed, 56 insertions(+), 56 deletions(-) diff --git a/absl/container/btree_map.h b/absl/container/btree_map.h index 32a82ef062c..131f622fef5 100644 --- a/absl/container/btree_map.h +++ b/absl/container/btree_map.h @@ -117,8 +117,8 @@ class ABSL_ATTRIBUTE_OWNER btree_map // // * Copy assignment operator // - // absl::btree_map map4; - // map4 = map3; + // absl::btree_map map4; + // map4 = map3; // // * Move constructor // @@ -555,8 +555,8 @@ class ABSL_ATTRIBUTE_OWNER btree_multimap // // * Copy assignment operator // - // absl::btree_multimap map4; - // map4 = map3; + // absl::btree_multimap map4; + // map4 = map3; // // * Move constructor // diff --git a/absl/container/btree_set.h b/absl/container/btree_set.h index 16181de577f..44a39cf8071 100644 --- a/absl/container/btree_set.h +++ b/absl/container/btree_set.h @@ -119,8 +119,8 @@ class ABSL_ATTRIBUTE_OWNER btree_set // // * Copy assignment operator // - // absl::btree_set set4; - // set4 = set3; + // absl::btree_set set4; + // set4 = set3; // // * Move constructor // @@ -475,8 +475,8 @@ class ABSL_ATTRIBUTE_OWNER btree_multiset // // * Copy assignment operator // - // absl::btree_multiset set4; - // set4 = set3; + // absl::btree_multiset set4; + // set4 = set3; // // * Move constructor // diff --git a/absl/container/flat_hash_map.h b/absl/container/flat_hash_map.h index bc86ced9971..5fa502328d2 100644 --- a/absl/container/flat_hash_map.h +++ b/absl/container/flat_hash_map.h @@ -115,18 +115,18 @@ struct FlatHashMapPolicy; // absl::flat_hash_map ducks = // {{"a", "huey"}, {"b", "dewey"}, {"c", "louie"}}; // -// // Insert a new element into the flat hash map -// ducks.insert({"d", "donald"}); +// // Insert a new element into the flat hash map +// ducks.insert({"d", "donald"}); // -// // Force a rehash of the flat hash map -// ducks.rehash(0); +// // Force a rehash of the flat hash map +// ducks.rehash(0); // -// // Find the element with the key "b" -// std::string search_key = "b"; -// auto result = ducks.find(search_key); -// if (result != ducks.end()) { -// std::cout << "Result: " << result->second << std::endl; -// } +// // Find the element with the key "b" +// std::string search_key = "b"; +// auto result = ducks.find(search_key); +// if (result != ducks.end()) { +// std::cout << "Result: " << result->second << std::endl; +// } template , class Eq = DefaultHashContainerEq, class Allocator = std::allocator>> @@ -158,9 +158,9 @@ class ABSL_ATTRIBUTE_OWNER flat_hash_map // // * Copy assignment operator // - // // Hash functor and Comparator are copied as well - // absl::flat_hash_map map4; - // map4 = map3; + // // Hash functor and Comparator are copied as well + // absl::flat_hash_map map4; + // map4 = map3; // // * Move constructor // diff --git a/absl/container/flat_hash_set.h b/absl/container/flat_hash_set.h index bf63eb59867..bc1ceb17e63 100644 --- a/absl/container/flat_hash_set.h +++ b/absl/container/flat_hash_set.h @@ -114,16 +114,16 @@ struct FlatHashSetPolicy; // absl::flat_hash_set ducks = // {"huey", "dewey", "louie"}; // -// // Insert a new element into the flat hash set -// ducks.insert("donald"); +// // Insert a new element into the flat hash set +// ducks.insert("donald"); // -// // Force a rehash of the flat hash set -// ducks.rehash(0); +// // Force a rehash of the flat hash set +// ducks.rehash(0); // -// // See if "dewey" is present -// if (ducks.contains("dewey")) { -// std::cout << "We found dewey!" << std::endl; -// } +// // See if "dewey" is present +// if (ducks.contains("dewey")) { +// std::cout << "We found dewey!" << std::endl; +// } template , class Eq = DefaultHashContainerEq, class Allocator = std::allocator> @@ -154,9 +154,9 @@ class ABSL_ATTRIBUTE_OWNER flat_hash_set // // * Copy assignment operator // - // // Hash functor and Comparator are copied as well - // absl::flat_hash_set set4; - // set4 = set3; + // // Hash functor and Comparator are copied as well + // absl::flat_hash_set set4; + // set4 = set3; // // * Move constructor // diff --git a/absl/container/node_hash_map.h b/absl/container/node_hash_map.h index 8aed18b2e61..5f6be95acb9 100644 --- a/absl/container/node_hash_map.h +++ b/absl/container/node_hash_map.h @@ -110,18 +110,18 @@ class NodeHashMapPolicy; // absl::node_hash_map ducks = // {{"a", "huey"}, {"b", "dewey"}, {"c", "louie"}}; // -// // Insert a new element into the node hash map -// ducks.insert({"d", "donald"}}; +// // Insert a new element into the node hash map +// ducks.insert({"d", "donald"}}; // -// // Force a rehash of the node hash map -// ducks.rehash(0); +// // Force a rehash of the node hash map +// ducks.rehash(0); // -// // Find the element with the key "b" -// std::string search_key = "b"; -// auto result = ducks.find(search_key); -// if (result != ducks.end()) { -// std::cout << "Result: " << result->second << std::endl; -// } +// // Find the element with the key "b" +// std::string search_key = "b"; +// auto result = ducks.find(search_key); +// if (result != ducks.end()) { +// std::cout << "Result: " << result->second << std::endl; +// } template , class Eq = DefaultHashContainerEq, class Alloc = std::allocator>> @@ -153,9 +153,9 @@ class ABSL_ATTRIBUTE_OWNER node_hash_map // // * Copy assignment operator // - // // Hash functor and Comparator are copied as well - // absl::node_hash_map map4; - // map4 = map3; + // // Hash functor and Comparator are copied as well + // absl::node_hash_map map4; + // map4 = map3; // // * Move constructor // diff --git a/absl/container/node_hash_set.h b/absl/container/node_hash_set.h index 6240e2d9154..127c64008ba 100644 --- a/absl/container/node_hash_set.h +++ b/absl/container/node_hash_set.h @@ -108,16 +108,16 @@ struct NodeHashSetPolicy; // absl::node_hash_set ducks = // {"huey", "dewey", "louie"}; // -// // Insert a new element into the node hash set -// ducks.insert("donald"); +// // Insert a new element into the node hash set +// ducks.insert("donald"); // -// // Force a rehash of the node hash set -// ducks.rehash(0); +// // Force a rehash of the node hash set +// ducks.rehash(0); // -// // See if "dewey" is present -// if (ducks.contains("dewey")) { -// std::cout << "We found dewey!" << std::endl; -// } +// // See if "dewey" is present +// if (ducks.contains("dewey")) { +// std::cout << "We found dewey!" << std::endl; +// } template , class Eq = DefaultHashContainerEq, class Alloc = std::allocator> class ABSL_ATTRIBUTE_OWNER node_hash_set @@ -147,9 +147,9 @@ class ABSL_ATTRIBUTE_OWNER node_hash_set // // * Copy assignment operator // - // // Hash functor and Comparator are copied as well - // absl::node_hash_set set4; - // set4 = set3; + // // Hash functor and Comparator are copied as well + // absl::node_hash_set set4; + // set4 = set3; // // * Move constructor // From f28774a28227c4e30041616bff4aa0120ed724c0 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Thu, 15 May 2025 01:37:12 -0700 Subject: [PATCH 034/107] Avoid mixing after `Hash64` calls for long strings by passing `state` instead of `Seed` to low level hash. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` name old INSTRUCTIONS/op new INSTRUCTIONS/op delta BM_latency_AbslHash_Int32 9.00 ± 0% 9.00 ± 0% ~ (all samples are equal) BM_latency_AbslHash_Int64 9.00 ± 0% 9.00 ± 0% ~ (all samples are equal) BM_latency_AbslHash_String3 32.0 ± 0% 32.0 ± 0% ~ (all samples are equal) BM_latency_AbslHash_String5 30.9 ± 3% 31.0 ± 4% ~ (p=0.328 n=54+56) BM_latency_AbslHash_String9 29.0 ± 4% 28.9 ± 3% ~ (p=0.413 n=55+53) BM_latency_AbslHash_String17 27.0 ± 3% 27.4 ± 2% +1.74% (p=0.000 n=53+50) BM_latency_AbslHash_String33 32.5 ± 5% 32.9 ± 6% +1.23% (p=0.002 n=55+52) BM_latency_AbslHash_String65 63.6 ±14% 55.9 ±11% -12.02% (p=0.000 n=56+51) BM_latency_AbslHash_String257 134 ±10% 122 ±10% -8.91% (p=0.000 n=52+49) name old CYCLES/op new CYCLES/op delta BM_latency_AbslHash_Int32 16.1 ± 2% 16.0 ± 4% ~ (p=0.108 n=51+56) BM_latency_AbslHash_Int64 16.5 ± 4% 16.4 ± 5% ~ (p=0.074 n=54+54) BM_latency_AbslHash_String3 22.7 ± 1% 22.6 ± 0% -0.50% (p=0.000 n=54+53) BM_latency_AbslHash_String5 22.9 ± 8% 22.5 ± 7% -2.08% (p=0.011 n=56+55) BM_latency_AbslHash_String9 23.1 ±15% 22.1 ± 7% -4.62% (p=0.000 n=57+52) BM_latency_AbslHash_String17 21.7 ± 8% 21.4 ± 3% -1.54% (p=0.001 n=55+55) BM_latency_AbslHash_String33 23.5 ± 4% 23.6 ± 5% ~ (p=0.206 n=53+53) BM_latency_AbslHash_String65 32.5 ± 8% 28.8 ± 7% -11.29% (p=0.000 n=57+53) BM_latency_AbslHash_String257 51.7 ± 9% 46.5 ± 7% -9.93% (p=0.000 n=54+50) ``` PiperOrigin-RevId: 759037628 Change-Id: I22e8c8e777901906015a29377b0ebd9c33310cf0 --- absl/hash/internal/hash.cc | 2 +- absl/hash/internal/hash.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/absl/hash/internal/hash.cc b/absl/hash/internal/hash.cc index d1765fb9a78..b185a0acfef 100644 --- a/absl/hash/internal/hash.cc +++ b/absl/hash/internal/hash.cc @@ -44,7 +44,7 @@ uint64_t MixingHashState::CombineLargeContiguousImpl32( uint64_t MixingHashState::CombineLargeContiguousImpl64( uint64_t state, const unsigned char* first, size_t len) { while (len >= PiecewiseChunkSize()) { - state = Mix(state ^ Hash64(first, PiecewiseChunkSize()), kMul); + state = Hash64(first, PiecewiseChunkSize(), state); len -= PiecewiseChunkSize(); first += PiecewiseChunkSize(); } diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index dfc9a6f73ff..f400c7be1a7 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -1324,12 +1324,13 @@ class ABSL_DLL MixingHashState : public HashStateBase { } ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data, - size_t len) { + size_t len, + uint64_t state) { #ifdef ABSL_HAVE_INTRINSIC_INT128 - return LowLevelHashLenGt32(data, len, Seed()); + return LowLevelHashLenGt32(data, len, state); #else return hash_internal::CityHash64WithSeed( - reinterpret_cast(data), len, Seed()); + reinterpret_cast(data), len, state); #endif } @@ -1399,7 +1400,7 @@ inline uint64_t MixingHashState::CombineContiguousImpl( return CombineContiguousImpl17to32(state, first, len); } if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { - return Mix(state ^ Hash64(first, len), kMul); + return Hash64(first, len, state); } return CombineLargeContiguousImpl64(state, first, len); } From be5661825b1172d55c190a087ceb8907187d523a Mon Sep 17 00:00:00 2001 From: Ben Beasley Date: Thu, 15 May 2025 08:14:53 -0700 Subject: [PATCH 035/107] PR #1888: Adjust Table.GrowExtremelyLargeTable to avoid OOM on i386 Imported from GitHub PR https://github.com/abseil/abseil-cpp/pull/1888 While this only covers `i386`/`i686`, which is the motivation for this PR, this test can be expected to OOM on any 32-bit platform. For now, this is the minimal change that avoids the problem [in the Fedora package](https://src.fedoraproject.org/rpms/abseil-cpp/). This fixes one of the two test failures reported in https://github.com/abseil/abseil-cpp/issues/1887. Merge 395acb74da05fa35d924059a170ebd8267356b65 into f28774a28227c4e30041616bff4aa0120ed724c0 Merging this change closes #1888 COPYBARA_INTEGRATE_REVIEW=https://github.com/abseil/abseil-cpp/pull/1888 from musicinmybrain:extremely-large-table-32-bit 395acb74da05fa35d924059a170ebd8267356b65 PiperOrigin-RevId: 759154889 Change-Id: I0a105fc42c51898c277b4a056ccd6599b43e1a50 --- absl/container/internal/raw_hash_set_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index a5cbd44d3b2..2c55d0fc079 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -4267,8 +4267,8 @@ struct ConstUint8Hash { // 5. Finally we will catch up and go to overflow codepath. TEST(Table, GrowExtremelyLargeTable) { constexpr size_t kTargetCapacity = -#if defined(__wasm__) || defined(__asmjs__) - NextCapacity(ProbedItem4Bytes::kMaxNewCapacity); // OOMs on WASM. +#if defined(__wasm__) || defined(__asmjs__) || defined(__i386__) + NextCapacity(ProbedItem4Bytes::kMaxNewCapacity); // OOMs on WASM, 32-bit. #else NextCapacity(ProbedItem8Bytes::kMaxNewCapacity); #endif From 41a1f434d40bc6089acaf026b00c5f8f917e8706 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Thu, 15 May 2025 13:26:50 -0700 Subject: [PATCH 036/107] Avoid allocating control bytes in capacity==1 swisstables. PiperOrigin-RevId: 759275918 Change-Id: Id4e88ae026e3ebee04f5bdb28a7191e54aaea1e9 --- absl/container/internal/raw_hash_set.cc | 8 ++++---- absl/container/internal/raw_hash_set.h | 2 +- absl/container/internal/raw_hash_set_test.cc | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index d91b0211ec9..2a08e34d1e8 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -443,6 +443,7 @@ void ResetCtrl(CommonFields& common, size_t slot_size) { ctrl_t* ctrl = common.control(); static constexpr size_t kTwoGroupCapacity = 2 * Group::kWidth - 1; if (ABSL_PREDICT_TRUE(capacity <= kTwoGroupCapacity)) { + if (IsSmallCapacity(capacity)) return; std::memset(ctrl, static_cast(ctrl_t::kEmpty), Group::kWidth); std::memset(ctrl + capacity, static_cast(ctrl_t::kEmpty), Group::kWidth); @@ -591,13 +592,14 @@ size_t FindNewPositionsAndTransferSlots( const auto insert_slot = [&](void* slot) { size_t hash = policy.hash_slot(hash_fn, slot); - auto target = find_first_non_full(common, hash); + FindInfo target = + common.is_small() ? FindInfo{0, 0} : find_first_non_full(common, hash); SetCtrl(common, target.offset, H2(hash), slot_size); policy.transfer_n(&common, SlotAddress(new_slots, target.offset, slot_size), slot, 1); return target.probe_length; }; - if (old_capacity == 1) { + if (IsSmallCapacity(old_capacity)) { if (common.size() == 1) insert_slot(old_slots); return 0; } @@ -1343,8 +1345,6 @@ void SmallEmptyNonSooPrepareInsert(CommonFields& common, const bool has_infoz = infoz.IsSampled(); void* alloc = policy.get_char_alloc(common); - // TODO(b/413062340): don't allocate control bytes for capacity 1 tables. We - // don't use the control bytes in this case. const auto [new_ctrl, new_slots] = AllocBackingArray(common, policy, kNewCapacity, has_infoz, alloc); common.set_control(new_ctrl); diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 0be990212b1..41294b85d14 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -796,7 +796,7 @@ constexpr size_t NumClonedBytes() { return Group::kWidth - 1; } // Returns the number of control bytes including cloned. constexpr size_t NumControlBytes(size_t capacity) { - return capacity + 1 + NumClonedBytes(); + return IsSmallCapacity(capacity) ? 0 : capacity + 1 + NumClonedBytes(); } // Computes the offset from the start of the backing allocation of control. diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 2c55d0fc079..48ad5fce947 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -1184,7 +1184,7 @@ TYPED_TEST(SmallTableResizeTest, InsertIntoSmallTable) { t.insert(i); ASSERT_EQ(t.size(), i + 1); for (int j = 0; j < i + 1; ++j) { - EXPECT_TRUE(t.find(j) != t.end()); + ASSERT_TRUE(t.find(j) != t.end()); EXPECT_EQ(*t.find(j), j); } } @@ -1207,7 +1207,7 @@ TYPED_TEST(SmallTableResizeTest, ResizeGrowSmallTables) { t.reserve(target_size); } for (size_t i = 0; i < source_size; ++i) { - EXPECT_TRUE(t.find(static_cast(i)) != t.end()); + ASSERT_TRUE(t.find(static_cast(i)) != t.end()); EXPECT_EQ(*t.find(static_cast(i)), static_cast(i)); } } @@ -1232,7 +1232,7 @@ TYPED_TEST(SmallTableResizeTest, ResizeReduceSmallTables) { << "rehash(0) must resize to the minimum capacity"; } for (size_t i = 0; i < inserted_count; ++i) { - EXPECT_TRUE(t.find(static_cast(i)) != t.end()); + ASSERT_TRUE(t.find(static_cast(i)) != t.end()); EXPECT_EQ(*t.find(static_cast(i)), static_cast(i)); } } From fc0b7a083b189e4dbe3c09ff948c8e4f5205d1af Mon Sep 17 00:00:00 2001 From: Jesse Rosenstock Date: Mon, 19 May 2025 07:12:44 -0700 Subject: [PATCH 037/107] [bits] Add tests for return types With gcc <= 12, std::bit_width() returns T, not int, so make sure the absl:: equivalents return the correct type. https://github.com/abseil/abseil-cpp/issues/1890 PiperOrigin-RevId: 760612745 Change-Id: Ibbbe6eaa1aab677ecd747cf40765f6443eefe628 --- absl/numeric/bits_test.cc | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/absl/numeric/bits_test.cc b/absl/numeric/bits_test.cc index 3b71cccf88a..2977976d198 100644 --- a/absl/numeric/bits_test.cc +++ b/absl/numeric/bits_test.cc @@ -26,16 +26,37 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace { +template +class UnsignedIntegerTypesTest : public ::testing::Test {}; template class IntegerTypesTest : public ::testing::Test {}; +using UnsignedIntegerTypes = + ::testing::Types; using OneByteIntegerTypes = ::testing::Types< unsigned char, uint8_t >; +TYPED_TEST_SUITE(UnsignedIntegerTypesTest, UnsignedIntegerTypes); TYPED_TEST_SUITE(IntegerTypesTest, OneByteIntegerTypes); +TYPED_TEST(UnsignedIntegerTypesTest, ReturnTypes) { + using UIntType = TypeParam; + + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); +} + TYPED_TEST(IntegerTypesTest, HandlesTypes) { using UIntType = TypeParam; From 7b61c35cde6a2e495c157f7202166d24291d2bb5 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 19 May 2025 11:10:45 -0700 Subject: [PATCH 038/107] Change kEmptyGroup to kDefaultIterControl now that it's only used for default-constructed iterators. PiperOrigin-RevId: 760696829 Change-Id: Ica318414c6562d16a3eac291c38eb2b171519c29 --- absl/container/internal/raw_hash_set.cc | 19 ++----------- absl/container/internal/raw_hash_set.h | 29 ++++++++------------ absl/container/internal/raw_hash_set_test.cc | 4 --- 3 files changed, 14 insertions(+), 38 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index 2a08e34d1e8..d805f71dabe 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -40,20 +40,9 @@ namespace container_internal { // Represents a control byte corresponding to a full slot with arbitrary hash. constexpr ctrl_t ZeroCtrlT() { return static_cast(0); } -// We have space for `growth_info` before a single block of control bytes. A -// single block of empty control bytes for tables without any slots allocated. -// This enables removing a branch in the hot path of find(). In order to ensure -// that the control bytes are aligned to 16, we have 16 bytes before the control -// bytes even though growth_info only needs 8. -alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[32] = { - ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), - ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), - ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), - ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), - ctrl_t::kSentinel, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, - ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, - ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, - ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty}; +// A single control byte for default-constructed iterators. We leave it +// uninitialized because reading this memory is a bug. +ABSL_DLL ctrl_t kDefaultIterControl; // We need one full byte followed by a sentinel byte for iterator::operator++ to // work. We have a full group after kSentinel to be safe (in case operator++ is @@ -64,8 +53,6 @@ ABSL_CONST_INIT ABSL_DLL const ctrl_t kSooControl[17] = { ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty}; -static_assert(NumControlBytes(SooCapacity()) <= 17, - "kSooControl capacity too small"); namespace { diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 41294b85d14..7d10dfa45ab 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -380,10 +380,7 @@ constexpr bool IsNoThrowSwappable(std::false_type /* is_swappable */) { return false; } -// See definition comment for why this is size 32. -// TODO(b/413062340): we can probably reduce this to 16 now that it's only used -// for default-constructed iterators. -ABSL_DLL extern const ctrl_t kEmptyGroup[32]; +ABSL_DLL extern ctrl_t kDefaultIterControl; // We use these sentinel capacity values in debug mode to indicate different // classes of bugs. @@ -397,13 +394,9 @@ enum InvalidCapacity : size_t { kSelfMovedFrom, }; -// Returns a pointer to a control byte group that can be used by -// default-constructed iterators. -inline ctrl_t* EmptyGroup() { - // Const must be cast away here; no uses of this function will actually write - // to it because it is only used for default-constructed iterators. - return const_cast(kEmptyGroup + 16); -} +// Returns a pointer to a control byte that can be used by default-constructed +// iterators. We don't expect this pointer to be dereferenced. +inline ctrl_t* DefaultIterControl() { return &kDefaultIterControl; } // For use in SOO iterators. // TODO(b/289225379): we could potentially get rid of this by adding an is_soo @@ -1269,7 +1262,7 @@ inline void AssertIsFull(const ctrl_t* ctrl, GenerationType generation, if (ABSL_PREDICT_FALSE(ctrl == nullptr)) { ABSL_RAW_LOG(FATAL, "%s called on end() iterator.", operation); } - if (ABSL_PREDICT_FALSE(ctrl == EmptyGroup())) { + if (ABSL_PREDICT_FALSE(ctrl == DefaultIterControl())) { ABSL_RAW_LOG(FATAL, "%s called on default-constructed iterator.", operation); } @@ -1304,7 +1297,7 @@ inline void AssertIsValidForComparison(const ctrl_t* ctrl, const GenerationType* generation_ptr) { if (!SwisstableDebugEnabled()) return; const bool ctrl_is_valid_for_comparison = - ctrl == nullptr || ctrl == EmptyGroup() || IsFull(*ctrl); + ctrl == nullptr || ctrl == DefaultIterControl() || IsFull(*ctrl); if (SwisstableGenerationsEnabled()) { if (ABSL_PREDICT_FALSE(generation != *generation_ptr)) { ABSL_RAW_LOG(FATAL, @@ -1370,8 +1363,8 @@ inline void AssertSameContainer(const ctrl_t* ctrl_a, const ctrl_t* ctrl_b, } }; - const bool a_is_default = ctrl_a == EmptyGroup(); - const bool b_is_default = ctrl_b == EmptyGroup(); + const bool a_is_default = ctrl_a == DefaultIterControl(); + const bool b_is_default = ctrl_b == DefaultIterControl(); if (a_is_default && b_is_default) return; fail_if(a_is_default != b_is_default, "Comparing default-constructed hashtable iterator with a " @@ -2110,9 +2103,9 @@ class raw_hash_set { ctrl_t* control() const { return ctrl_; } slot_type* slot() const { return slot_; } - // We use EmptyGroup() for default-constructed iterators so that they can - // be distinguished from end iterators, which have nullptr ctrl_. - ctrl_t* ctrl_ = EmptyGroup(); + // We use DefaultIterControl() for default-constructed iterators so that + // they can be distinguished from end iterators, which have nullptr ctrl_. + ctrl_t* ctrl_ = DefaultIterControl(); // To avoid uninitialized member warnings, put slot_ in an anonymous union. // The member is not initialized on singleton and end iterators. union { diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 48ad5fce947..6da336000b5 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -423,10 +423,6 @@ TEST(BitMask, LeadingTrailing) { EXPECT_EQ((BitMask(0x8000000000000000).TrailingZeros()), 7); } -TEST(Group, EmptyGroup) { - for (h2_t h = 0; h != 128; ++h) EXPECT_FALSE(Group{EmptyGroup()}.Match(h)); -} - TEST(Group, Match) { if (Group::kWidth == 16) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), From 01acaa9089c2b203cbfd4d5ef4d302d3a12c24b6 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Tue, 20 May 2025 04:02:12 -0700 Subject: [PATCH 039/107] Take `get_hash` implementation out of the SwissTable class to minimize number of instantiations. Tables with the same key and hash would have the same instantiation. PiperOrigin-RevId: 761014089 Change-Id: I2eafb43b440f54127ffa48aaf18e9a03eeca888d --- absl/container/BUILD.bazel | 1 + absl/container/CMakeLists.txt | 1 + absl/container/internal/container_memory.h | 52 ++++++++++++++++- absl/container/internal/hash_policy_traits.h | 12 +--- absl/container/internal/raw_hash_set.h | 61 +++++--------------- 5 files changed, 67 insertions(+), 60 deletions(-) diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel index 61e816fc83a..34ac288f18a 100644 --- a/absl/container/BUILD.bazel +++ b/absl/container/BUILD.bazel @@ -530,6 +530,7 @@ cc_library( linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ ":common_policy_traits", + ":container_memory", "//absl/meta:type_traits", ], ) diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt index d8cd7d08984..edc4a827099 100644 --- a/absl/container/CMakeLists.txt +++ b/absl/container/CMakeLists.txt @@ -583,6 +583,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::container_memory absl::common_policy_traits absl::meta PUBLIC diff --git a/absl/container/internal/container_memory.h b/absl/container/internal/container_memory.h index e7ac1dba43b..ed7b90b169a 100644 --- a/absl/container/internal/container_memory.h +++ b/absl/container/internal/container_memory.h @@ -464,6 +464,54 @@ struct map_slot_policy { } }; +// Suppress erroneous uninitialized memory errors on GCC. For example, GCC +// thinks that the call to slot_array() in find_or_prepare_insert() is reading +// uninitialized memory, but slot_array is only called there when the table is +// non-empty and this memory is initialized when the table is non-empty. +#if !defined(__clang__) && defined(__GNUC__) +#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") \ + _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") x; \ + _Pragma("GCC diagnostic pop") +#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) \ + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(return x) +#else +#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) x +#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) return x +#endif + +// Variadic arguments hash function that ignore the rest of the arguments. +// Useful for usage with policy traits. +template +struct HashElement { + template + size_t operator()(const K& key, Args&&...) const { + return h(key); + } + const Hash& h; +}; + +// No arguments function hash function for a specific key. +template +struct HashKey { + size_t operator()() const { return HashElement{hash}(key); } + const Hash& hash; + const Key& key; +}; + +// Variadic arguments equality function that ignore the rest of the arguments. +// Useful for usage with policy traits. +template +struct EqualElement { + template + bool operator()(const K2& lhs, Args&&...) const { + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(eq(lhs, rhs)); + } + const K1& rhs; + const KeyEqual& eq; +}; + // Type erased function for computing hash of the slot. using HashSlotFn = size_t (*)(const void* hash_fn, void* slot); @@ -472,7 +520,7 @@ using HashSlotFn = size_t (*)(const void* hash_fn, void* slot); template size_t TypeErasedApplyToSlotFn(const void* fn, void* slot) { const auto* f = static_cast(fn); - return (*f)(*static_cast(slot)); + return HashElement{*f}(*static_cast(slot)); } // Type erased function to apply `Fn` to data inside of the `*slot_ptr`. @@ -481,7 +529,7 @@ template size_t TypeErasedDerefAndApplyToSlotFn(const void* fn, void* slot_ptr) { const auto* f = static_cast(fn); const T* slot = *static_cast(slot_ptr); - return (*f)(*slot); + return HashElement{*f}(*slot); } } // namespace container_internal diff --git a/absl/container/internal/hash_policy_traits.h b/absl/container/internal/hash_policy_traits.h index cd6b42f9ec6..1d7c910af81 100644 --- a/absl/container/internal/hash_policy_traits.h +++ b/absl/container/internal/hash_policy_traits.h @@ -22,6 +22,7 @@ #include #include "absl/container/internal/common_policy_traits.h" +#include "absl/container/internal/container_memory.h" #include "absl/meta/type_traits.h" namespace absl { @@ -145,8 +146,6 @@ struct hash_policy_traits : common_policy_traits { return P::value(elem); } - using HashSlotFn = size_t (*)(const void* hash_fn, void* slot); - template static constexpr HashSlotFn get_hash_slot_fn() { // get_hash_slot_fn may return nullptr to signal that non type erased function @@ -168,15 +167,6 @@ struct hash_policy_traits : common_policy_traits { static constexpr bool soo_enabled() { return soo_enabled_impl(Rank1{}); } private: - template - struct HashElement { - template - size_t operator()(const K& key, Args&&...) const { - return h(key); - } - const Hash& h; - }; - template static size_t hash_slot_fn_non_type_erased(const void* hash_fn, void* slot) { return Policy::apply(HashElement{*static_cast(hash_fn)}, diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 7d10dfa45ab..fba69f47b48 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -845,23 +845,6 @@ class RawHashSetLayout { struct HashtableFreeFunctionsAccess; -// Suppress erroneous uninitialized memory errors on GCC. For example, GCC -// thinks that the call to slot_array() in find_or_prepare_insert() is reading -// uninitialized memory, but slot_array is only called there when the table is -// non-empty and this memory is initialized when the table is non-empty. -#if !defined(__clang__) && defined(__GNUC__) -#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") \ - _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") x; \ - _Pragma("GCC diagnostic pop") -#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) \ - ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(return x) -#else -#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) x -#define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) return x -#endif - // This allows us to work around an uninitialized memory warning when // constructing begin() iterators in empty hashtables. template @@ -1820,6 +1803,7 @@ void GrowFullSooTableToNextCapacityForceSampling(CommonFields& common, size_t GrowToNextCapacityAndPrepareInsert(CommonFields& common, const PolicyFunctions& policy, size_t new_hash); + // When growing from capacity 0 to 1, we only need the hash if the table ends up // being sampled so don't compute it unless needed. void SmallEmptyNonSooPrepareInsert(CommonFields& common, @@ -2964,24 +2948,6 @@ class raw_hash_set { const raw_hash_set& s; }; - struct HashElement { - template - size_t operator()(const K& key, Args&&...) const { - return h(key); - } - const hasher& h; - }; - - template - struct EqualElement { - template - bool operator()(const K2& lhs, Args&&...) const { - ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(eq(lhs, rhs)); - } - const K1& rhs; - const key_equal& eq; - }; - struct EmplaceDecomposable { template std::pair operator()(const K& key, Args&&... args) const { @@ -3036,8 +3002,9 @@ class raw_hash_set { template iterator find_small(const key_arg& key) { ABSL_SWISSTABLE_ASSERT(is_small()); - return empty() || !PolicyTraits::apply(EqualElement{key, eq_ref()}, - PolicyTraits::element(single_slot())) + return empty() || !PolicyTraits::apply( + EqualElement{key, eq_ref()}, + PolicyTraits::element(single_slot())) ? end() : single_iterator(); } @@ -3055,7 +3022,7 @@ class raw_hash_set { Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(PolicyTraits::apply( - EqualElement{key, eq_ref()}, + EqualElement{key, eq_ref()}, PolicyTraits::element(slot_array() + seq.offset(i))))) return iterator_at(seq.offset(i)); } @@ -3138,10 +3105,10 @@ class raw_hash_set { template size_t hash_of(const K& key) const { - return hash_ref()(key); + return HashElement{hash_ref()}(key); } size_t hash_of(slot_type* slot) const { - return PolicyTraits::apply(HashElement{hash_ref()}, + return PolicyTraits::apply(HashElement{hash_ref()}, PolicyTraits::element(slot)); } @@ -3260,7 +3227,7 @@ class raw_hash_set { if (empty()) { if (!SooEnabled()) { SmallEmptyNonSooPrepareInsert(common(), GetPolicyFunctions(), - [&] { return hash_of(key); }); + HashKey{hash_ref(), key}); return {single_iterator(), true}; } if (!should_sample_soo()) { @@ -3268,7 +3235,7 @@ class raw_hash_set { return {single_iterator(), true}; } soo_slot_ctrl = ctrl_t::kEmpty; - } else if (PolicyTraits::apply(EqualElement{key, eq_ref()}, + } else if (PolicyTraits::apply(EqualElement{key, eq_ref()}, PolicyTraits::element(single_slot()))) { return {single_iterator(), false}; } else if constexpr (SooEnabled()) { @@ -3307,7 +3274,7 @@ class raw_hash_set { Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(PolicyTraits::apply( - EqualElement{key, eq_ref()}, + EqualElement{key, eq_ref()}, PolicyTraits::element(slot_array() + seq.offset(i))))) return {iterator_at(seq.offset(i)), false}; } @@ -3385,12 +3352,12 @@ class raw_hash_set { const auto assert_consistent = [&](const ctrl_t*, void* slot) { const value_type& element = PolicyTraits::element(static_cast(slot)); - const bool is_key_equal = - PolicyTraits::apply(EqualElement{key, eq_ref()}, element); + const bool is_key_equal = PolicyTraits::apply( + EqualElement{key, eq_ref()}, element); if (!is_key_equal) return; const size_t hash_of_slot = - PolicyTraits::apply(HashElement{hash_ref()}, element); + PolicyTraits::apply(HashElement{hash_ref()}, element); ABSL_ATTRIBUTE_UNUSED const bool is_hash_equal = hash_of_arg == hash_of_slot; assert((!is_key_equal || is_hash_equal) && @@ -3750,7 +3717,7 @@ struct HashtableDebugAccess> { container_internal::Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { if (Traits::apply( - typename Set::template EqualElement{ + EqualElement{ key, set.eq_ref()}, Traits::element(set.slot_array() + seq.offset(i)))) return num_probes; From f0835ec75b44a6cc8959f582b99b4a6a0b4de0b3 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Tue, 20 May 2025 04:28:56 -0700 Subject: [PATCH 040/107] Extract the entire PrepareInsert to Small non SOO table out of the line. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also introduced specialized `Grow1To3AndPrepareInsert`. I have moved generation of seed into that function, because we do not need a seed for a small table (capacity = 1). That eliminates the cost of seed generation for tables that never grew above size 1. At the moment code in `Grow1To3AndPrepareInsert` has a lot of similarities to `GrowSooTableToNextCapacityAndPrepareInsert`. But there are already differences that are not easy to generalize. Implementations would likely/definitely diverge more in the future. ``` name old INSTRUCTIONS/op new INSTRUCTIONS/op delta BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1 39.1 ± 0% 39.1 ± 0% ~ (all samples are equal) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2 116 ± 0% 116 ± 0% ~ (p=0.732 n=51+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4 147 ± 0% 145 ± 0% -1.19% (p=0.000 n=51+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8 135 ± 0% 133 ± 0% -1.30% (p=0.000 n=51+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16 139 ± 0% 138 ± 0% -0.72% (p=0.000 n=52+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32 134 ± 0% 134 ± 0% -0.42% (p=0.000 n=51+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64 128 ± 0% 128 ± 0% -0.24% (p=0.000 n=52+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128 123 ± 0% 122 ± 0% -0.14% (p=0.000 n=48+49) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:256 119 ± 0% 119 ± 0% -0.08% (p=0.000 n=52+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:512 116 ± 0% 116 ± 0% -0.04% (p=0.000 n=51+46) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1024 115 ± 0% 115 ± 0% -0.02% (p=0.000 n=50+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2048 113 ± 0% 113 ± 0% -0.01% (p=0.000 n=46+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4096 113 ± 0% 113 ± 0% -0.01% (p=0.000 n=50+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8192 113 ± 0% 113 ± 0% -0.01% (p=0.000 n=48+46) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16384 112 ± 0% 112 ± 0% -0.00% (p=0.000 n=46+44) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32768 112 ± 0% 112 ± 0% ~ (p=0.255 n=53+54) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:65536 112 ± 0% 112 ± 0% ~ (p=0.625 n=52+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:131072 112 ± 0% 112 ± 0% ~ (p=0.088 n=55+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:262144 112 ± 0% 112 ± 0% ~ (p=0.875 n=50+45) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:524288 112 ± 0% 112 ± 0% +0.00% (p=0.000 n=47+43) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1048576 112 ± 0% 112 ± 0% +0.00% (p=0.049 n=47+49) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:1 189 ± 0% 180 ± 0% -4.76% (p=0.000 n=56+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:2 237 ± 0% 228 ± 0% -4.01% (p=0.000 n=52+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:4 221 ± 0% 215 ± 0% -2.93% (p=0.000 n=56+43) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:8 186 ± 0% 182 ± 0% -2.22% (p=0.000 n=52+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:16 175 ± 0% 173 ± 0% -1.25% (p=0.000 n=52+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:32 162 ± 0% 161 ± 0% -0.71% (p=0.000 n=53+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:64 152 ± 0% 151 ± 0% -0.40% (p=0.000 n=49+49) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:128 144 ± 0% 144 ± 0% -0.22% (p=0.000 n=49+49) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:256 140 ± 0% 139 ± 0% -0.12% (p=0.000 n=53+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:512 136 ± 0% 136 ± 0% -0.06% (p=0.000 n=50+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:1024 134 ± 0% 134 ± 0% -0.03% (p=0.000 n=49+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:2048 134 ± 0% 134 ± 0% -0.06% (p=0.000 n=52+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:4096 133 ± 0% 133 ± 0% -0.01% (p=0.045 n=53+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:8192 133 ± 0% 133 ± 0% ~ (p=0.119 n=56+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:16384 132 ± 0% 132 ± 0% ~ (p=0.843 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:32768 132 ± 0% 132 ± 0% ~ (p=0.209 n=57+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:65536 132 ± 0% 132 ± 0% +0.02% (p=0.047 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:131072 131 ± 0% 131 ± 0% ~ (p=0.642 n=51+54) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:262144 131 ± 0% 131 ± 0% ~ (p=0.217 n=44+49) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:524288 131 ± 0% 131 ± 0% -0.01% (p=0.002 n=57+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:1048576 131 ± 0% 131 ± 0% +0.01% (p=0.000 n=57+44) name old CYCLES/op new CYCLES/op delta BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1 14.2 ± 0% 14.2 ± 0% -0.35% (p=0.000 n=54+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2 33.3 ± 0% 34.4 ± 0% +3.33% (p=0.000 n=42+46) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4 47.4 ± 0% 47.0 ± 0% -0.89% (p=0.000 n=49+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8 46.4 ± 0% 46.0 ± 0% -0.90% (p=0.000 n=48+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16 50.0 ± 1% 49.2 ± 1% -1.65% (p=0.000 n=53+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32 50.6 ± 1% 50.1 ± 1% -0.96% (p=0.000 n=54+54) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64 50.9 ± 0% 50.9 ± 0% -0.19% (p=0.000 n=55+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128 49.4 ± 1% 49.5 ± 1% +0.16% (p=0.000 n=54+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:256 47.6 ± 2% 47.9 ± 2% +0.80% (p=0.000 n=51+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:512 45.4 ± 2% 45.6 ± 2% +0.48% (p=0.000 n=53+54) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1024 44.1 ± 1% 44.3 ± 1% +0.49% (p=0.000 n=49+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2048 43.2 ± 1% 43.3 ± 1% +0.24% (p=0.001 n=54+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4096 42.7 ± 1% 42.8 ± 1% +0.23% (p=0.000 n=54+54) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8192 42.7 ± 0% 42.7 ± 1% +0.14% (p=0.003 n=55+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16384 43.0 ± 0% 43.0 ± 1% +0.07% (p=0.044 n=57+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32768 43.5 ± 0% 43.6 ± 0% +0.13% (p=0.000 n=53+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:65536 44.3 ± 1% 44.4 ± 1% +0.12% (p=0.001 n=55+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:131072 45.3 ± 1% 45.3 ± 1% +0.15% (p=0.004 n=56+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:262144 46.8 ± 2% 47.0 ± 2% +0.56% (p=0.012 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:524288 49.5 ± 1% 49.6 ± 1% ~ (p=0.061 n=49+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1048576 53.7 ± 2% 53.7 ± 2% ~ (p=0.538 n=55+51) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:1 53.8 ± 0% 52.2 ± 0% -2.93% (p=0.000 n=52+53) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:2 68.2 ± 0% 65.3 ± 0% -4.26% (p=0.000 n=52+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:4 65.3 ± 2% 64.3 ± 3% -1.65% (p=0.000 n=56+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:8 59.5 ± 4% 58.3 ± 2% -1.93% (p=0.000 n=57+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:16 59.7 ± 2% 59.1 ± 1% -0.93% (p=0.000 n=52+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:32 58.5 ± 2% 58.2 ± 1% -0.47% (p=0.000 n=50+46) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:64 59.0 ± 3% 58.2 ± 2% -1.29% (p=0.000 n=57+52) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:128 57.9 ± 2% 57.3 ± 2% -1.05% (p=0.000 n=54+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:256 61.8 ± 3% 61.3 ± 3% -0.74% (p=0.000 n=56+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:512 60.6 ± 2% 60.2 ± 2% -0.68% (p=0.000 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:1024 59.9 ± 3% 59.5 ± 3% -0.58% (p=0.000 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:2048 60.0 ± 3% 59.7 ± 4% -0.56% (p=0.000 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:4096 65.3 ± 3% 65.1 ± 4% -0.40% (p=0.005 n=57+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:8192 74.8 ± 3% 74.4 ± 4% -0.55% (p=0.042 n=55+56) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:16384 79.5 ± 2% 79.4 ± 3% ~ (p=0.198 n=56+57) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:32768 83.4 ± 2% 83.3 ± 2% ~ (p=0.683 n=57+49) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:65536 86.6 ± 2% 86.8 ± 3% ~ (p=0.230 n=55+54) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:131072 98.2 ±10% 97.5 ± 7% ~ (p=0.575 n=56+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:262144 157 ± 6% 156 ± 4% ~ (p=0.081 n=52+50) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:524288 172 ± 7% 173 ± 7% ~ (p=0.561 n=57+55) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 64>/set_size:1048576 185 ± 4% 185 ± 7% ~ (p=0.724 n=57+56) ``` PiperOrigin-RevId: 761022848 Change-Id: Ia9f71fa1f32a5549696662dd13195775a2746beb --- absl/container/internal/raw_hash_set.cc | 148 ++++++++++++++++-------- absl/container/internal/raw_hash_set.h | 75 ++++++------ 2 files changed, 139 insertions(+), 84 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index d805f71dabe..3316b970252 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include "absl/base/attributes.h" #include "absl/base/config.h" @@ -1225,20 +1226,71 @@ void IncrementSmallSize(CommonFields& common, } } -} // namespace +std::pair Grow1To3AndPrepareInsert( + CommonFields& common, const PolicyFunctions& __restrict policy, + absl::FunctionRef get_hash) { + // TODO(b/413062340): Refactor to reuse more code with + // GrowSooTableToNextCapacityAndPrepareInsert. + ABSL_SWISSTABLE_ASSERT(common.capacity() == 1); + ABSL_SWISSTABLE_ASSERT(!common.empty()); + ABSL_SWISSTABLE_ASSERT(!policy.soo_enabled); + constexpr size_t kOldCapacity = 1; + constexpr size_t kNewCapacity = NextCapacity(kOldCapacity); + ctrl_t* old_ctrl = common.control(); + void* old_slots = common.slot_array(); + + common.set_capacity(kNewCapacity); + const size_t slot_size = policy.slot_size; + const size_t slot_align = policy.slot_align; + void* alloc = policy.get_char_alloc(common); + HashtablezInfoHandle infoz = common.infoz(); + const bool has_infoz = infoz.IsSampled(); + + const auto [new_ctrl, new_slots] = + AllocBackingArray(common, policy, kNewCapacity, has_infoz, alloc); + common.set_control(new_ctrl); + common.set_slots(new_slots); + SanitizerPoisonMemoryRegion(new_slots, kNewCapacity * slot_size); + const size_t new_hash = get_hash(); + h2_t new_h2 = H2(new_hash); + size_t orig_hash = policy.hash_slot(policy.hash_fn(common), old_slots); + size_t offset = Resize1To3NewOffset(new_hash, common.seed()); + InitializeThreeElementsControlBytes(H2(orig_hash), new_h2, offset, new_ctrl); + + void* old_element_target = NextSlot(new_slots, slot_size); + SanitizerUnpoisonMemoryRegion(old_element_target, slot_size); + policy.transfer_n(&common, old_element_target, old_slots, 1); + + void* new_element_target_slot = SlotAddress(new_slots, offset, slot_size); + SanitizerUnpoisonMemoryRegion(new_element_target_slot, slot_size); + + policy.dealloc(alloc, kOldCapacity, old_ctrl, slot_size, slot_align, + has_infoz); + PrepareInsertCommon(common); + GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(1); + + if (ABSL_PREDICT_FALSE(has_infoz)) { + common.set_has_infoz(); + infoz.RecordStorageChanged(common.size() - 1, kNewCapacity); + infoz.RecordRehash(0); + infoz.RecordInsert(new_hash, 0); + common.set_infoz(infoz); + } + return {new_ctrl + offset, new_element_target_slot}; +} + +// Grows to next capacity and prepares insert for the given new_hash. +// Returns the offset of the new element. size_t GrowToNextCapacityAndPrepareInsert( CommonFields& common, const PolicyFunctions& __restrict policy, size_t new_hash) { ABSL_SWISSTABLE_ASSERT(common.growth_left() == 0); const size_t old_capacity = common.capacity(); ABSL_SWISSTABLE_ASSERT(old_capacity > policy.soo_capacity()); + ABSL_SWISSTABLE_ASSERT(!IsSmallCapacity(old_capacity)); const size_t new_capacity = NextCapacity(old_capacity); - ABSL_SWISSTABLE_ASSERT(IsValidCapacity(new_capacity)); - ABSL_SWISSTABLE_ASSERT(new_capacity > policy.soo_capacity()); - ABSL_SWISSTABLE_ASSERT(!IsSmallCapacity(new_capacity)); - ctrl_t* old_ctrl = common.control(); void* old_slots = common.slot_array(); @@ -1260,25 +1312,15 @@ size_t GrowToNextCapacityAndPrepareInsert( FindInfo find_info; if (ABSL_PREDICT_TRUE(is_single_group(new_capacity))) { size_t offset; - if (old_capacity == 1) { - size_t orig_hash = policy.hash_slot(policy.hash_fn(common), old_slots); - offset = Resize1To3NewOffset(new_hash, common.seed()); - InitializeThreeElementsControlBytes(H2(orig_hash), new_h2, offset, - new_ctrl); - void* target_slot = SlotAddress(new_slots, offset, slot_size); - SanitizerUnpoisonMemoryRegion(target_slot, slot_size); - } else { - GrowIntoSingleGroupShuffleControlBytes(old_ctrl, old_capacity, new_ctrl, - new_capacity); - // We put the new element either at the beginning or at the end of the - // table with approximately equal probability. - offset = SingleGroupTableH1(new_hash, common.seed()) & 1 - ? 0 - : new_capacity - 1; - - ABSL_SWISSTABLE_ASSERT(IsEmpty(new_ctrl[offset])); - SetCtrlInSingleGroupTable(common, offset, new_h2, policy.slot_size); - } + GrowIntoSingleGroupShuffleControlBytes(old_ctrl, old_capacity, new_ctrl, + new_capacity); + // We put the new element either at the beginning or at the end of the + // table with approximately equal probability. + offset = + SingleGroupTableH1(new_hash, common.seed()) & 1 ? 0 : new_capacity - 1; + + ABSL_SWISSTABLE_ASSERT(IsEmpty(new_ctrl[offset])); + SetCtrlInSingleGroupTable(common, offset, new_h2, policy.slot_size); find_info = FindInfo{offset, 0}; // Single group tables have all slots full on resize. So we can transfer // all slots without checking the control bytes. @@ -1309,16 +1351,24 @@ size_t GrowToNextCapacityAndPrepareInsert( return find_info.offset; } -void SmallEmptyNonSooPrepareInsert(CommonFields& common, - const PolicyFunctions& __restrict policy, - absl::FunctionRef get_hash) { +} // namespace + +std::pair SmallNonSooPrepareInsert( + CommonFields& common, const PolicyFunctions& __restrict policy, + absl::FunctionRef get_hash) { ABSL_SWISSTABLE_ASSERT(common.is_small()); ABSL_SWISSTABLE_ASSERT(!policy.soo_enabled); if (common.capacity() == 1) { - IncrementSmallSize(common, policy); - return; + if (common.empty()) { + IncrementSmallSize(common, policy); + return {SooControl(), common.slot_array()}; + } else { + return Grow1To3AndPrepareInsert(common, policy, get_hash); + } } + // Growing from 0 to 1 capacity. + ABSL_SWISSTABLE_ASSERT(common.capacity() == 0); constexpr size_t kNewCapacity = 1; common.set_capacity(kNewCapacity); @@ -1334,7 +1384,8 @@ void SmallEmptyNonSooPrepareInsert(CommonFields& common, const auto [new_ctrl, new_slots] = AllocBackingArray(common, policy, kNewCapacity, has_infoz, alloc); - common.set_control(new_ctrl); + // In small tables seed is not needed. + common.set_control(new_ctrl); common.set_slots(new_slots); static_assert(NextCapacity(0) == 1); @@ -1344,14 +1395,16 @@ void SmallEmptyNonSooPrepareInsert(CommonFields& common, // worth it. GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(0); - if (ABSL_PREDICT_TRUE(!has_infoz)) return; - // TODO(b/413062340): we could potentially store infoz in place of the control - // pointer for the capacity 1 case. - common.set_has_infoz(); - infoz.RecordStorageChanged(/*size=*/0, kNewCapacity); - infoz.RecordRehash(/*total_probe_length=*/0); - infoz.RecordInsert(get_hash(), /*distance_from_desired=*/0); - common.set_infoz(infoz); + if (ABSL_PREDICT_FALSE(has_infoz)) { + // TODO(b/413062340): we could potentially store infoz in place of the + // control pointer for the capacity 1 case. + common.set_has_infoz(); + infoz.RecordStorageChanged(/*size=*/0, kNewCapacity); + infoz.RecordRehash(/*total_probe_length=*/0); + infoz.RecordInsert(get_hash(), /*distance_from_desired=*/0); + common.set_infoz(infoz); + } + return {SooControl(), new_slots}; } namespace { @@ -1504,6 +1557,17 @@ ABSL_ATTRIBUTE_NOINLINE void ReserveAllocatedTable( common.infoz().RecordReservation(new_size); } +// As `ResizeFullSooTableToNextCapacity`, except that we also force the SOO +// table to be sampled. SOO tables need to switch from SOO to heap in order to +// store the infoz. No-op if sampling is disabled or not possible. +void GrowFullSooTableToNextCapacityForceSampling( + CommonFields& common, const PolicyFunctions& __restrict policy) { + AssertFullSoo(common, policy); + ResizeFullSooTable( + common, policy, NextCapacity(SooCapacity()), + ResizeFullSooTableSamplingMode::kForceSampleNoResizeIfUnsampled); +} + } // namespace void* GetRefForEmptyClass(CommonFields& common) { @@ -1594,14 +1658,6 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert( return offset; } -void GrowFullSooTableToNextCapacityForceSampling( - CommonFields& common, const PolicyFunctions& __restrict policy) { - AssertFullSoo(common, policy); - ResizeFullSooTable( - common, policy, NextCapacity(SooCapacity()), - ResizeFullSooTableSamplingMode::kForceSampleNoResizeIfUnsampled); -} - void Rehash(CommonFields& common, const PolicyFunctions& __restrict policy, size_t n) { const size_t cap = common.capacity(); diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index fba69f47b48..4d3f713800b 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1792,23 +1792,13 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert(CommonFields& common, size_t new_hash, ctrl_t soo_slot_ctrl); -// As `ResizeFullSooTableToNextCapacity`, except that we also force the SOO -// table to be sampled. SOO tables need to switch from SOO to heap in order to -// store the infoz. No-op if sampling is disabled or not possible. -void GrowFullSooTableToNextCapacityForceSampling(CommonFields& common, - const PolicyFunctions& policy); - -// Grows to next capacity and prepares insert for the given new_hash. -// Returns the offset of the new element. -size_t GrowToNextCapacityAndPrepareInsert(CommonFields& common, - const PolicyFunctions& policy, - size_t new_hash); - -// When growing from capacity 0 to 1, we only need the hash if the table ends up -// being sampled so don't compute it unless needed. -void SmallEmptyNonSooPrepareInsert(CommonFields& common, - const PolicyFunctions& policy, - absl::FunctionRef get_hash); +// PrepareInsert for small tables (is_small()==true). +// Returns the new control and the new slot. +// Hash is only computed if the table is sampled or grew to large size +// (is_small()==false). +std::pair SmallNonSooPrepareInsert( + CommonFields& common, const PolicyFunctions& policy, + absl::FunctionRef get_hash); // Resizes table with allocated slots and change the table seed. // Tables with SOO enabled must have capacity > policy.soo_capacity. @@ -3221,15 +3211,10 @@ class raw_hash_set { } template - std::pair find_or_prepare_insert_small(const K& key) { - ABSL_SWISSTABLE_ASSERT(is_small()); - [[maybe_unused]] ctrl_t soo_slot_ctrl; + std::pair find_or_prepare_insert_soo(const K& key) { + ABSL_SWISSTABLE_ASSERT(is_soo()); + ctrl_t soo_slot_ctrl; if (empty()) { - if (!SooEnabled()) { - SmallEmptyNonSooPrepareInsert(common(), GetPolicyFunctions(), - HashKey{hash_ref(), key}); - return {single_iterator(), true}; - } if (!should_sample_soo()) { common().set_full_soo(); return {single_iterator(), true}; @@ -3238,25 +3223,35 @@ class raw_hash_set { } else if (PolicyTraits::apply(EqualElement{key, eq_ref()}, PolicyTraits::element(single_slot()))) { return {single_iterator(), false}; - } else if constexpr (SooEnabled()) { + } else { soo_slot_ctrl = static_cast(H2(hash_of(single_slot()))); } ABSL_SWISSTABLE_ASSERT(capacity() == 1); const size_t hash = hash_of(key); - size_t index; + constexpr bool kUseMemcpy = + PolicyTraits::transfer_uses_memcpy() && SooEnabled(); + size_t index = GrowSooTableToNextCapacityAndPrepareInsert< + kUseMemcpy ? OptimalMemcpySizeForSooSlotTransfer(sizeof(slot_type)) : 0, + kUseMemcpy>(common(), GetPolicyFunctions(), hash, soo_slot_ctrl); + return {iterator_at(index), true}; + } + + template + std::pair find_or_prepare_insert_small(const K& key) { + ABSL_SWISSTABLE_ASSERT(is_small()); if constexpr (SooEnabled()) { - constexpr bool kUseMemcpy = - PolicyTraits::transfer_uses_memcpy() && SooEnabled(); - index = GrowSooTableToNextCapacityAndPrepareInsert< - kUseMemcpy ? OptimalMemcpySizeForSooSlotTransfer(sizeof(slot_type)) - : 0, - kUseMemcpy>(common(), GetPolicyFunctions(), hash, soo_slot_ctrl); - } else { - // TODO(b/413062340): add specialized function for growing from 1 to 3. - index = GrowToNextCapacityAndPrepareInsert(common(), GetPolicyFunctions(), - hash); + return find_or_prepare_insert_soo(key); } - return {iterator_at(index), true}; + if (!empty()) { + if (PolicyTraits::apply(EqualElement{key, eq_ref()}, + PolicyTraits::element(single_slot()))) { + return {single_iterator(), false}; + } + } + return {iterator_at_ptr( + SmallNonSooPrepareInsert(common(), GetPolicyFunctions(), + HashKey{hash_ref(), key})), + true}; } template @@ -3409,6 +3404,10 @@ class raw_hash_set { const_iterator iterator_at(size_t i) const ABSL_ATTRIBUTE_LIFETIME_BOUND { return const_cast(this)->iterator_at(i); } + iterator iterator_at_ptr(std::pair ptrs) + ABSL_ATTRIBUTE_LIFETIME_BOUND { + return {ptrs.first, to_slot(ptrs.second), common().generation_ptr()}; + } reference unchecked_deref(iterator it) { return it.unchecked_deref(); } From 05ba922c1263c2879a1840e60d7519bfd2a4bf27 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Tue, 20 May 2025 06:14:25 -0700 Subject: [PATCH 041/107] Small cleanup of `infoz` processing to get the logic out of the line or removed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. For SOO growth from 1 to 3: remove recording since table couldn't be sampled in full SOO state. 2. For non-SOO growth for 0 to 1 and from 1 to 3: move reporting out of the line and reuse the code. Microbenchmarks show some wins for SOO case. ``` name old CYCLES/op new CYCLES/op delta BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1 14.2 ± 0% 14.2 ± 0% -0.15% (p=0.000 n=51+47) BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2 34.4 ± 0% 32.4 ± 0% -5.83% (p=0.000 n=51+54) ``` PiperOrigin-RevId: 761052883 Change-Id: Icfe1503e5234af6f1b61fee12078d0e0cebbfcac --- absl/container/internal/raw_hash_set.cc | 74 +++++++++++++++++-------- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index 3316b970252..ddbf170c1e2 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -601,6 +601,45 @@ size_t FindNewPositionsAndTransferSlots( return total_probe_length; } +void ReportGrowthToInfozImpl(CommonFields& common, HashtablezInfoHandle infoz, + size_t hash, size_t total_probe_length, + size_t distance_from_desired) { + ABSL_SWISSTABLE_ASSERT(infoz.IsSampled()); + infoz.RecordStorageChanged(common.size() - 1, common.capacity()); + infoz.RecordRehash(total_probe_length); + infoz.RecordInsert(hash, distance_from_desired); + common.set_has_infoz(); + // TODO(b/413062340): we could potentially store infoz in place of the + // control pointer for the capacity 1 case. + common.set_infoz(infoz); +} + +// Specialization to avoid passing two 0s from hot function. +ABSL_ATTRIBUTE_NOINLINE void ReportSingleGroupTableGrowthToInfoz( + CommonFields& common, HashtablezInfoHandle infoz, size_t hash) { + ReportGrowthToInfozImpl(common, infoz, hash, /*total_probe_length=*/0, + /*distance_from_desired=*/0); +} + +ABSL_ATTRIBUTE_NOINLINE void ReportGrowthToInfoz(CommonFields& common, + HashtablezInfoHandle infoz, + size_t hash, + size_t total_probe_length, + size_t distance_from_desired) { + ReportGrowthToInfozImpl(common, infoz, hash, total_probe_length, + distance_from_desired); +} + +ABSL_ATTRIBUTE_NOINLINE void ReportResizeToInfoz(CommonFields& common, + HashtablezInfoHandle infoz, + size_t total_probe_length) { + ABSL_SWISSTABLE_ASSERT(infoz.IsSampled()); + infoz.RecordStorageChanged(common.size(), common.capacity()); + infoz.RecordRehash(total_probe_length); + common.set_has_infoz(); + common.set_infoz(infoz); +} + struct BackingArrayPtrs { ctrl_t* ctrl; void* slots; @@ -662,11 +701,8 @@ void ResizeNonSooImpl(CommonFields& common, CapacityToGrowth(new_capacity)); } - if (has_infoz) { - common.set_has_infoz(); - infoz.RecordStorageChanged(common.size(), new_capacity); - infoz.RecordRehash(total_probe_length); - common.set_infoz(infoz); + if (ABSL_PREDICT_FALSE(has_infoz)) { + ReportResizeToInfoz(common, infoz, total_probe_length); } } @@ -1268,14 +1304,11 @@ std::pair Grow1To3AndPrepareInsert( policy.dealloc(alloc, kOldCapacity, old_ctrl, slot_size, slot_align, has_infoz); PrepareInsertCommon(common); - GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(1); + ABSL_SWISSTABLE_ASSERT(common.size() == 2); + GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(kNewCapacity - 2); if (ABSL_PREDICT_FALSE(has_infoz)) { - common.set_has_infoz(); - infoz.RecordStorageChanged(common.size() - 1, kNewCapacity); - infoz.RecordRehash(0); - infoz.RecordInsert(new_hash, 0); - common.set_infoz(infoz); + ReportSingleGroupTableGrowthToInfoz(common, infoz, new_hash); } return {new_ctrl + offset, new_element_target_slot}; } @@ -1342,11 +1375,8 @@ size_t GrowToNextCapacityAndPrepareInsert( common.size()); if (ABSL_PREDICT_FALSE(has_infoz)) { - common.set_has_infoz(); - infoz.RecordStorageChanged(common.size() - 1, new_capacity); - infoz.RecordRehash(total_probe_length); - infoz.RecordInsert(new_hash, find_info.probe_length); - common.set_infoz(infoz); + ReportGrowthToInfoz(common, infoz, new_hash, total_probe_length, + find_info.probe_length); } return find_info.offset; } @@ -1396,13 +1426,7 @@ std::pair SmallNonSooPrepareInsert( GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(0); if (ABSL_PREDICT_FALSE(has_infoz)) { - // TODO(b/413062340): we could potentially store infoz in place of the - // control pointer for the capacity 1 case. - common.set_has_infoz(); - infoz.RecordStorageChanged(/*size=*/0, kNewCapacity); - infoz.RecordRehash(/*total_probe_length=*/0); - infoz.RecordInsert(get_hash(), /*distance_from_desired=*/0); - common.set_infoz(infoz); + ReportSingleGroupTableGrowthToInfoz(common, infoz, get_hash()); } return {SooControl(), new_slots}; } @@ -1652,7 +1676,9 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert( common.set_control(new_ctrl); common.set_slots(new_slots); - common.infoz().RecordInsert(new_hash, /*distance_from_desired=*/0); + // Full SOO table couldn't be sampled. If SOO table is sampled, it would + // have been resized to the next capacity. + ABSL_SWISSTABLE_ASSERT(!common.infoz().IsSampled()); SanitizerUnpoisonMemoryRegion(SlotAddress(new_slots, offset, slot_size), slot_size); return offset; From 0c0879586e6369673ed42253a32dbec13e5f44b4 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Tue, 20 May 2025 08:23:21 -0700 Subject: [PATCH 042/107] Import of CCTZ from GitHub. PiperOrigin-RevId: 761092464 Change-Id: I8497e9c9a034d222e48815732fd5f9b1103b7d51 --- .../internal/cctz/include/cctz/civil_time_detail.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/absl/time/internal/cctz/include/cctz/civil_time_detail.h b/absl/time/internal/cctz/include/cctz/civil_time_detail.h index 2b0aed56c38..fe3b8bdf822 100644 --- a/absl/time/internal/cctz/include/cctz/civil_time_detail.h +++ b/absl/time/internal/cctz/include/cctz/civil_time_detail.h @@ -96,6 +96,18 @@ CONSTEXPR_F int days_per_4years(int yi) noexcept { CONSTEXPR_F int days_per_year(year_t y, month_t m) noexcept { return is_leap_year(y + (m > 2)) ? 366 : 365; } +// The compiler cannot optimize away the check if we use +// -fsanitize=array-bounds. +// m is guaranteed to be in [1:12] in the caller, but the compiler cannot +// optimize away the check even when this function is inlined into BreakTime. +// To reduce the overhead, we use no_sanitize to skip the unnecessary +// -fsanitize=array-bounds check. Remove no_sanitize once the missed +// optimization is fixed. +#if defined(__clang__) && defined(__has_cpp_attribute) +#if __has_cpp_attribute(clang::no_sanitize) +[[clang::no_sanitize("array-bounds")]] +#endif +#endif CONSTEXPR_F int days_per_month(year_t y, month_t m) noexcept { CONSTEXPR_D int k_days_per_month[1 + 12] = { -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 // non leap year From 5914831e6de7b5571fa9a0d8f507d4b1c801804b Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Tue, 20 May 2025 10:38:53 -0700 Subject: [PATCH 043/107] Do `common.increment_size()` directly in SmallNonSooPrepareInsert if inserting to reserved 1 element table. That avoids `policy.soo_enabled` branch and makes the function slightly smaller. Also removed update of unused `growth_info` in `IncrementSmallSize`. `IncrementSmallSize` is only used in `Copy` now. PiperOrigin-RevId: 761141482 Change-Id: I5f46a1a239faa5205b53c9bf1289ff742cb9cace --- absl/container/internal/raw_hash_set.cc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index ddbf170c1e2..5238c81a2df 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -1250,15 +1250,20 @@ size_t GrowToNextCapacityDispatch(CommonFields& common, } } +void IncrementSmallSizeNonSoo(CommonFields& common, + const PolicyFunctions& __restrict policy) { + ABSL_SWISSTABLE_ASSERT(common.is_small()); + common.increment_size(); + SanitizerUnpoisonMemoryRegion(common.slot_array(), policy.slot_size); +} + void IncrementSmallSize(CommonFields& common, const PolicyFunctions& __restrict policy) { ABSL_SWISSTABLE_ASSERT(common.is_small()); if (policy.soo_enabled) { common.set_full_soo(); } else { - common.increment_size(); - common.growth_info().OverwriteEmptyAsFull(); - SanitizerUnpoisonMemoryRegion(common.slot_array(), policy.slot_size); + IncrementSmallSizeNonSoo(common, policy); } } @@ -1390,7 +1395,7 @@ std::pair SmallNonSooPrepareInsert( ABSL_SWISSTABLE_ASSERT(!policy.soo_enabled); if (common.capacity() == 1) { if (common.empty()) { - IncrementSmallSize(common, policy); + IncrementSmallSizeNonSoo(common, policy); return {SooControl(), common.slot_array()}; } else { return Grow1To3AndPrepareInsert(common, policy, get_hash); From 282d0fcc089a27b8dc3904e86019371c492b9a1e Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Tue, 20 May 2025 12:15:01 -0700 Subject: [PATCH 044/107] Simplify calls to `EqualElement` by introducing `equal_to` helper function. I am marking one liner helper functions as `ABSL_ATTRIBUTE_ALWAYS_INLINE` to avoid them ending up as weak symbols for the linker. PiperOrigin-RevId: 761180450 Change-Id: I3593a5a9e8317df7714715608bc7309c3fcc8bbb --- absl/container/internal/raw_hash_set.h | 48 ++++++++++---------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 4d3f713800b..f9c9b0b7e3a 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -2992,11 +2992,7 @@ class raw_hash_set { template iterator find_small(const key_arg& key) { ABSL_SWISSTABLE_ASSERT(is_small()); - return empty() || !PolicyTraits::apply( - EqualElement{key, eq_ref()}, - PolicyTraits::element(single_slot())) - ? end() - : single_iterator(); + return empty() || !equal_to(key, single_slot()) ? end() : single_iterator(); } template @@ -3011,9 +3007,7 @@ class raw_hash_set { #endif Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { - if (ABSL_PREDICT_TRUE(PolicyTraits::apply( - EqualElement{key, eq_ref()}, - PolicyTraits::element(slot_array() + seq.offset(i))))) + if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return iterator_at(seq.offset(i)); } if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end(); @@ -3094,17 +3088,25 @@ class raw_hash_set { } template - size_t hash_of(const K& key) const { + ABSL_ATTRIBUTE_ALWAYS_INLINE bool equal_to(const K& key, + slot_type* slot) const { + return PolicyTraits::apply(EqualElement{key, eq_ref()}, + PolicyTraits::element(slot)); + } + template + ABSL_ATTRIBUTE_ALWAYS_INLINE size_t hash_of(const K& key) const { return HashElement{hash_ref()}(key); } - size_t hash_of(slot_type* slot) const { + ABSL_ATTRIBUTE_ALWAYS_INLINE size_t hash_of(slot_type* slot) const { return PolicyTraits::apply(HashElement{hash_ref()}, PolicyTraits::element(slot)); } // Casting directly from e.g. char* to slot_type* can cause compilation errors // on objective-C. This function converts to void* first, avoiding the issue. - static slot_type* to_slot(void* buf) { return static_cast(buf); } + static ABSL_ATTRIBUTE_ALWAYS_INLINE slot_type* to_slot(void* buf) { + return static_cast(buf); + } // Requires that lhs does not have a full SOO slot. static void move_common(bool rhs_is_full_soo, CharAlloc& rhs_alloc, @@ -3220,8 +3222,7 @@ class raw_hash_set { return {single_iterator(), true}; } soo_slot_ctrl = ctrl_t::kEmpty; - } else if (PolicyTraits::apply(EqualElement{key, eq_ref()}, - PolicyTraits::element(single_slot()))) { + } else if (equal_to(key, single_slot())) { return {single_iterator(), false}; } else { soo_slot_ctrl = static_cast(H2(hash_of(single_slot()))); @@ -3243,8 +3244,7 @@ class raw_hash_set { return find_or_prepare_insert_soo(key); } if (!empty()) { - if (PolicyTraits::apply(EqualElement{key, eq_ref()}, - PolicyTraits::element(single_slot()))) { + if (equal_to(key, single_slot())) { return {single_iterator(), false}; } } @@ -3268,9 +3268,7 @@ class raw_hash_set { #endif Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { - if (ABSL_PREDICT_TRUE(PolicyTraits::apply( - EqualElement{key, eq_ref()}, - PolicyTraits::element(slot_array() + seq.offset(i))))) + if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return {iterator_at(seq.offset(i)), false}; } auto mask_empty = g.MaskEmpty(); @@ -3345,16 +3343,11 @@ class raw_hash_set { const size_t hash_of_arg = hash_of(key); const auto assert_consistent = [&](const ctrl_t*, void* slot) { - const value_type& element = - PolicyTraits::element(static_cast(slot)); - const bool is_key_equal = PolicyTraits::apply( - EqualElement{key, eq_ref()}, element); + const bool is_key_equal = equal_to(key, to_slot(slot)); if (!is_key_equal) return; - const size_t hash_of_slot = - PolicyTraits::apply(HashElement{hash_ref()}, element); ABSL_ATTRIBUTE_UNUSED const bool is_hash_equal = - hash_of_arg == hash_of_slot; + hash_of_arg == hash_of(to_slot(slot)); assert((!is_key_equal || is_hash_equal) && "eq(k1, k2) must imply that hash(k1) == hash(k2). " "hash/eq functors are inconsistent."); @@ -3715,10 +3708,7 @@ struct HashtableDebugAccess> { while (true) { container_internal::Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { - if (Traits::apply( - EqualElement{ - key, set.eq_ref()}, - Traits::element(set.slot_array() + seq.offset(i)))) + if (set.equal_to(key, set.slot_array() + seq.offset(i))) return num_probes; ++num_probes; } From 4e94319de36fff9e7bda30e0538a0b9d158db459 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Wed, 21 May 2025 10:47:33 -0700 Subject: [PATCH 045/107] Move `hashtable_control_bytes` tests into their own file. PiperOrigin-RevId: 761584628 Change-Id: I39fd1b14ae71754e058e23305761a2e43d9e989a --- absl/container/BUILD.bazel | 13 + absl/container/CMakeLists.txt | 13 + .../internal/hashtable_control_bytes_test.cc | 256 ++++++++++++++++++ absl/container/internal/raw_hash_set_test.cc | 220 --------------- 4 files changed, 282 insertions(+), 220 deletions(-) create mode 100644 absl/container/internal/hashtable_control_bytes_test.cc diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel index 34ac288f18a..9a79523761c 100644 --- a/absl/container/BUILD.bazel +++ b/absl/container/BUILD.bazel @@ -698,6 +698,19 @@ cc_library( ], ) +cc_test( + name = "hashtable_control_bytes_test", + srcs = ["internal/hashtable_control_bytes_test.cc"], + copts = ABSL_TEST_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + ":hashtable_control_bytes", + "//absl/base:config", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) + cc_library( name = "raw_hash_set_resize_impl", hdrs = ["internal/raw_hash_set_resize_impl.h"], diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt index edc4a827099..b1c3ffac750 100644 --- a/absl/container/CMakeLists.txt +++ b/absl/container/CMakeLists.txt @@ -762,6 +762,19 @@ absl_cc_library( absl::endian ) +absl_cc_test( + NAME + hashtable_control_bytes_test + SRCS + "internal/hashtable_control_bytes_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::hashtable_control_bytes + GTest::gmock_main +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME diff --git a/absl/container/internal/hashtable_control_bytes_test.cc b/absl/container/internal/hashtable_control_bytes_test.cc new file mode 100644 index 00000000000..11d8f54a511 --- /dev/null +++ b/absl/container/internal/hashtable_control_bytes_test.cc @@ -0,0 +1,256 @@ +// Copyright 2025 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/container/internal/hashtable_control_bytes.h" + +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace container_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +// Convenience function to static cast to ctrl_t. +ctrl_t CtrlT(int i) { return static_cast(i); } + +TEST(BitMask, Smoke) { + EXPECT_FALSE((BitMask(0))); + EXPECT_TRUE((BitMask(5))); + + EXPECT_THAT((BitMask(0)), ElementsAre()); + EXPECT_THAT((BitMask(0x1)), ElementsAre(0)); + EXPECT_THAT((BitMask(0x2)), ElementsAre(1)); + EXPECT_THAT((BitMask(0x3)), ElementsAre(0, 1)); + EXPECT_THAT((BitMask(0x4)), ElementsAre(2)); + EXPECT_THAT((BitMask(0x5)), ElementsAre(0, 2)); + EXPECT_THAT((BitMask(0x55)), ElementsAre(0, 2, 4, 6)); + EXPECT_THAT((BitMask(0xAA)), ElementsAre(1, 3, 5, 7)); +} + +TEST(BitMask, WithShift_MatchPortable) { + // See the non-SSE version of Group for details on what this math is for. + uint64_t ctrl = 0x1716151413121110; + uint64_t hash = 0x12; + constexpr uint64_t lsbs = 0x0101010101010101ULL; + auto x = ctrl ^ (lsbs * hash); + uint64_t mask = (x - lsbs) & ~x & kMsbs8Bytes; + EXPECT_EQ(0x0000000080800000, mask); + + BitMask b(mask); + EXPECT_EQ(*b, 2); +} + +constexpr uint64_t kSome8BytesMask = /* */ 0x8000808080008000ULL; +constexpr uint64_t kSome8BytesMaskAllOnes = 0xff00ffffff00ff00ULL; +constexpr auto kSome8BytesMaskBits = std::array{1, 3, 4, 5, 7}; + +TEST(BitMask, WithShift_FullMask) { + EXPECT_THAT((BitMask(kMsbs8Bytes)), + ElementsAre(0, 1, 2, 3, 4, 5, 6, 7)); + EXPECT_THAT( + (BitMask(kMsbs8Bytes)), + ElementsAre(0, 1, 2, 3, 4, 5, 6, 7)); + EXPECT_THAT( + (BitMask(~uint64_t{0})), + ElementsAre(0, 1, 2, 3, 4, 5, 6, 7)); +} + +TEST(BitMask, WithShift_EmptyMask) { + EXPECT_THAT((BitMask(0)), ElementsAre()); + EXPECT_THAT((BitMask(0)), + ElementsAre()); +} + +TEST(BitMask, WithShift_SomeMask) { + EXPECT_THAT((BitMask(kSome8BytesMask)), + ElementsAreArray(kSome8BytesMaskBits)); + EXPECT_THAT((BitMask( + kSome8BytesMask)), + ElementsAreArray(kSome8BytesMaskBits)); + EXPECT_THAT((BitMask( + kSome8BytesMaskAllOnes)), + ElementsAreArray(kSome8BytesMaskBits)); +} + +TEST(BitMask, WithShift_SomeMaskExtraBitsForNullify) { + // Verify that adding extra bits into non zero bytes is fine. + uint64_t extra_bits = 77; + for (int i = 0; i < 100; ++i) { + // Add extra bits, but keep zero bytes untouched. + uint64_t extra_mask = extra_bits & kSome8BytesMaskAllOnes; + EXPECT_THAT((BitMask( + kSome8BytesMask | extra_mask)), + ElementsAreArray(kSome8BytesMaskBits)) + << i << " " << extra_mask; + extra_bits = (extra_bits + 1) * 3; + } +} + +TEST(BitMask, LeadingTrailing) { + EXPECT_EQ((BitMask(0x00001a40).LeadingZeros()), 3); + EXPECT_EQ((BitMask(0x00001a40).TrailingZeros()), 6); + + EXPECT_EQ((BitMask(0x00000001).LeadingZeros()), 15); + EXPECT_EQ((BitMask(0x00000001).TrailingZeros()), 0); + + EXPECT_EQ((BitMask(0x00008000).LeadingZeros()), 0); + EXPECT_EQ((BitMask(0x00008000).TrailingZeros()), 15); + + EXPECT_EQ((BitMask(0x0000008080808000).LeadingZeros()), 3); + EXPECT_EQ((BitMask(0x0000008080808000).TrailingZeros()), 1); + + EXPECT_EQ((BitMask(0x0000000000000080).LeadingZeros()), 7); + EXPECT_EQ((BitMask(0x0000000000000080).TrailingZeros()), 0); + + EXPECT_EQ((BitMask(0x8000000000000000).LeadingZeros()), 0); + EXPECT_EQ((BitMask(0x8000000000000000).TrailingZeros()), 7); +} + +TEST(Group, Match) { + if (Group::kWidth == 16) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), + ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), + CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; + EXPECT_THAT(Group{group}.Match(0), ElementsAre()); + EXPECT_THAT(Group{group}.Match(1), ElementsAre(1, 11, 12, 13, 14, 15)); + EXPECT_THAT(Group{group}.Match(3), ElementsAre(3, 10)); + EXPECT_THAT(Group{group}.Match(5), ElementsAre(5, 9)); + EXPECT_THAT(Group{group}.Match(7), ElementsAre(7, 8)); + } else if (Group::kWidth == 8) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), + ctrl_t::kDeleted, CtrlT(2), CtrlT(1), + ctrl_t::kSentinel, CtrlT(1)}; + EXPECT_THAT(Group{group}.Match(0), ElementsAre()); + EXPECT_THAT(Group{group}.Match(1), ElementsAre(1, 5, 7)); + EXPECT_THAT(Group{group}.Match(2), ElementsAre(2, 4)); + } else { + FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + } +} + +TEST(Group, MaskEmpty) { + if (Group::kWidth == 16) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), + ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), + CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskEmpty().LowestBitSet(), 0); + EXPECT_THAT(Group{group}.MaskEmpty().HighestBitSet(), 4); + } else if (Group::kWidth == 8) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), + ctrl_t::kDeleted, CtrlT(2), CtrlT(1), + ctrl_t::kSentinel, CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskEmpty().LowestBitSet(), 0); + EXPECT_THAT(Group{group}.MaskEmpty().HighestBitSet(), 0); + } else { + FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + } +} + +TEST(Group, MaskFull) { + if (Group::kWidth == 16) { + ctrl_t group[] = { + ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), + ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), ctrl_t::kDeleted, CtrlT(1), + CtrlT(1), ctrl_t::kSentinel, ctrl_t::kEmpty, CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskFull(), + ElementsAre(1, 3, 5, 7, 8, 9, 11, 12, 15)); + } else if (Group::kWidth == 8) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, + ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel, + ctrl_t::kSentinel, CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskFull(), ElementsAre(1, 4, 7)); + } else { + FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + } +} + +TEST(Group, MaskNonFull) { + if (Group::kWidth == 16) { + ctrl_t group[] = { + ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), + ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), ctrl_t::kDeleted, CtrlT(1), + CtrlT(1), ctrl_t::kSentinel, ctrl_t::kEmpty, CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskNonFull(), + ElementsAre(0, 2, 4, 6, 10, 13, 14)); + } else if (Group::kWidth == 8) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, + ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel, + ctrl_t::kSentinel, CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskNonFull(), ElementsAre(0, 2, 3, 5, 6)); + } else { + FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + } +} + +TEST(Group, MaskEmptyOrDeleted) { + if (Group::kWidth == 16) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, CtrlT(3), + ctrl_t::kDeleted, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), + CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), + CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().LowestBitSet(), 0); + EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().HighestBitSet(), 4); + } else if (Group::kWidth == 8) { + ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), + ctrl_t::kDeleted, CtrlT(2), CtrlT(1), + ctrl_t::kSentinel, CtrlT(1)}; + EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().LowestBitSet(), 0); + EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().HighestBitSet(), 3); + } else { + FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + } +} + +TEST(Group, CountLeadingEmptyOrDeleted) { + const std::vector empty_examples = {ctrl_t::kEmpty, ctrl_t::kDeleted}; + const std::vector full_examples = { + CtrlT(0), CtrlT(1), CtrlT(2), CtrlT(3), + CtrlT(5), CtrlT(9), CtrlT(127), ctrl_t::kSentinel}; + + for (ctrl_t empty : empty_examples) { + std::vector e(Group::kWidth, empty); + EXPECT_EQ(Group::kWidth, Group{e.data()}.CountLeadingEmptyOrDeleted()); + for (ctrl_t full : full_examples) { + for (size_t i = 0; i != Group::kWidth; ++i) { + std::vector f(Group::kWidth, empty); + f[i] = full; + EXPECT_EQ(i, Group{f.data()}.CountLeadingEmptyOrDeleted()); + } + std::vector f(Group::kWidth, empty); + f[Group::kWidth * 2 / 3] = full; + f[Group::kWidth / 2] = full; + EXPECT_EQ(Group::kWidth / 2, + Group{f.data()}.CountLeadingEmptyOrDeleted()); + } + } +} + +} // namespace +} // namespace container_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index 6da336000b5..f835b94ded9 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -54,8 +54,6 @@ #include "absl/container/internal/hash_function_defaults.h" #include "absl/container/internal/hash_policy_testing.h" #include "absl/random/random.h" -// TODO(b/382423690): Separate tests that depend only on -// hashtable_control_bytes. #include "absl/container/internal/hashtable_control_bytes.h" #include "absl/container/internal/hashtable_debug.h" #include "absl/container/internal/hashtablez_sampler.h" @@ -96,7 +94,6 @@ struct RawHashSetTestOnlyAccess { namespace { using ::testing::ElementsAre; -using ::testing::ElementsAreArray; using ::testing::Eq; using ::testing::Ge; using ::testing::Lt; @@ -329,199 +326,6 @@ TEST(Util, probe_seq) { EXPECT_THAT(offsets, ElementsAre(0, 16, 48, 96, 32, 112, 80, 64)); } -TEST(BitMask, Smoke) { - EXPECT_FALSE((BitMask(0))); - EXPECT_TRUE((BitMask(5))); - - EXPECT_THAT((BitMask(0)), ElementsAre()); - EXPECT_THAT((BitMask(0x1)), ElementsAre(0)); - EXPECT_THAT((BitMask(0x2)), ElementsAre(1)); - EXPECT_THAT((BitMask(0x3)), ElementsAre(0, 1)); - EXPECT_THAT((BitMask(0x4)), ElementsAre(2)); - EXPECT_THAT((BitMask(0x5)), ElementsAre(0, 2)); - EXPECT_THAT((BitMask(0x55)), ElementsAre(0, 2, 4, 6)); - EXPECT_THAT((BitMask(0xAA)), ElementsAre(1, 3, 5, 7)); -} - -TEST(BitMask, WithShift_MatchPortable) { - // See the non-SSE version of Group for details on what this math is for. - uint64_t ctrl = 0x1716151413121110; - uint64_t hash = 0x12; - constexpr uint64_t lsbs = 0x0101010101010101ULL; - auto x = ctrl ^ (lsbs * hash); - uint64_t mask = (x - lsbs) & ~x & kMsbs8Bytes; - EXPECT_EQ(0x0000000080800000, mask); - - BitMask b(mask); - EXPECT_EQ(*b, 2); -} - -constexpr uint64_t kSome8BytesMask = /* */ 0x8000808080008000ULL; -constexpr uint64_t kSome8BytesMaskAllOnes = 0xff00ffffff00ff00ULL; -constexpr auto kSome8BytesMaskBits = std::array{1, 3, 4, 5, 7}; - - -TEST(BitMask, WithShift_FullMask) { - EXPECT_THAT((BitMask(kMsbs8Bytes)), - ElementsAre(0, 1, 2, 3, 4, 5, 6, 7)); - EXPECT_THAT( - (BitMask(kMsbs8Bytes)), - ElementsAre(0, 1, 2, 3, 4, 5, 6, 7)); - EXPECT_THAT( - (BitMask(~uint64_t{0})), - ElementsAre(0, 1, 2, 3, 4, 5, 6, 7)); -} - -TEST(BitMask, WithShift_EmptyMask) { - EXPECT_THAT((BitMask(0)), ElementsAre()); - EXPECT_THAT((BitMask(0)), - ElementsAre()); -} - -TEST(BitMask, WithShift_SomeMask) { - EXPECT_THAT((BitMask(kSome8BytesMask)), - ElementsAreArray(kSome8BytesMaskBits)); - EXPECT_THAT((BitMask( - kSome8BytesMask)), - ElementsAreArray(kSome8BytesMaskBits)); - EXPECT_THAT((BitMask( - kSome8BytesMaskAllOnes)), - ElementsAreArray(kSome8BytesMaskBits)); -} - -TEST(BitMask, WithShift_SomeMaskExtraBitsForNullify) { - // Verify that adding extra bits into non zero bytes is fine. - uint64_t extra_bits = 77; - for (int i = 0; i < 100; ++i) { - // Add extra bits, but keep zero bytes untouched. - uint64_t extra_mask = extra_bits & kSome8BytesMaskAllOnes; - EXPECT_THAT((BitMask( - kSome8BytesMask | extra_mask)), - ElementsAreArray(kSome8BytesMaskBits)) - << i << " " << extra_mask; - extra_bits = (extra_bits + 1) * 3; - } -} - -TEST(BitMask, LeadingTrailing) { - EXPECT_EQ((BitMask(0x00001a40).LeadingZeros()), 3); - EXPECT_EQ((BitMask(0x00001a40).TrailingZeros()), 6); - - EXPECT_EQ((BitMask(0x00000001).LeadingZeros()), 15); - EXPECT_EQ((BitMask(0x00000001).TrailingZeros()), 0); - - EXPECT_EQ((BitMask(0x00008000).LeadingZeros()), 0); - EXPECT_EQ((BitMask(0x00008000).TrailingZeros()), 15); - - EXPECT_EQ((BitMask(0x0000008080808000).LeadingZeros()), 3); - EXPECT_EQ((BitMask(0x0000008080808000).TrailingZeros()), 1); - - EXPECT_EQ((BitMask(0x0000000000000080).LeadingZeros()), 7); - EXPECT_EQ((BitMask(0x0000000000000080).TrailingZeros()), 0); - - EXPECT_EQ((BitMask(0x8000000000000000).LeadingZeros()), 0); - EXPECT_EQ((BitMask(0x8000000000000000).TrailingZeros()), 7); -} - -TEST(Group, Match) { - if (Group::kWidth == 16) { - ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), - ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), - CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), - CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; - EXPECT_THAT(Group{group}.Match(0), ElementsAre()); - EXPECT_THAT(Group{group}.Match(1), ElementsAre(1, 11, 12, 13, 14, 15)); - EXPECT_THAT(Group{group}.Match(3), ElementsAre(3, 10)); - EXPECT_THAT(Group{group}.Match(5), ElementsAre(5, 9)); - EXPECT_THAT(Group{group}.Match(7), ElementsAre(7, 8)); - } else if (Group::kWidth == 8) { - ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), - ctrl_t::kDeleted, CtrlT(2), CtrlT(1), - ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.Match(0), ElementsAre()); - EXPECT_THAT(Group{group}.Match(1), ElementsAre(1, 5, 7)); - EXPECT_THAT(Group{group}.Match(2), ElementsAre(2, 4)); - } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; - } -} - -TEST(Group, MaskEmpty) { - if (Group::kWidth == 16) { - ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), - ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), - CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), - CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskEmpty().LowestBitSet(), 0); - EXPECT_THAT(Group{group}.MaskEmpty().HighestBitSet(), 4); - } else if (Group::kWidth == 8) { - ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), - ctrl_t::kDeleted, CtrlT(2), CtrlT(1), - ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskEmpty().LowestBitSet(), 0); - EXPECT_THAT(Group{group}.MaskEmpty().HighestBitSet(), 0); - } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; - } -} - -TEST(Group, MaskFull) { - if (Group::kWidth == 16) { - ctrl_t group[] = { - ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), - ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), - CtrlT(7), CtrlT(5), ctrl_t::kDeleted, CtrlT(1), - CtrlT(1), ctrl_t::kSentinel, ctrl_t::kEmpty, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskFull(), - ElementsAre(1, 3, 5, 7, 8, 9, 11, 12, 15)); - } else if (Group::kWidth == 8) { - ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, - ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel, - ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskFull(), ElementsAre(1, 4, 7)); - } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; - } -} - -TEST(Group, MaskNonFull) { - if (Group::kWidth == 16) { - ctrl_t group[] = { - ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), - ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), - CtrlT(7), CtrlT(5), ctrl_t::kDeleted, CtrlT(1), - CtrlT(1), ctrl_t::kSentinel, ctrl_t::kEmpty, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskNonFull(), - ElementsAre(0, 2, 4, 6, 10, 13, 14)); - } else if (Group::kWidth == 8) { - ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, - ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel, - ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskNonFull(), ElementsAre(0, 2, 3, 5, 6)); - } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; - } -} - -TEST(Group, MaskEmptyOrDeleted) { - if (Group::kWidth == 16) { - ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, CtrlT(3), - ctrl_t::kDeleted, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), - CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), - CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().LowestBitSet(), 0); - EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().HighestBitSet(), 4); - } else if (Group::kWidth == 8) { - ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), - ctrl_t::kDeleted, CtrlT(2), CtrlT(1), - ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().LowestBitSet(), 0); - EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().HighestBitSet(), 3); - } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; - } -} - TEST(Batch, DropDeletes) { constexpr size_t kCapacity = 63; constexpr size_t kGroupWidth = container_internal::Group::kWidth; @@ -547,30 +351,6 @@ TEST(Batch, DropDeletes) { } } -TEST(Group, CountLeadingEmptyOrDeleted) { - const std::vector empty_examples = {ctrl_t::kEmpty, ctrl_t::kDeleted}; - const std::vector full_examples = { - CtrlT(0), CtrlT(1), CtrlT(2), CtrlT(3), - CtrlT(5), CtrlT(9), CtrlT(127), ctrl_t::kSentinel}; - - for (ctrl_t empty : empty_examples) { - std::vector e(Group::kWidth, empty); - EXPECT_EQ(Group::kWidth, Group{e.data()}.CountLeadingEmptyOrDeleted()); - for (ctrl_t full : full_examples) { - for (size_t i = 0; i != Group::kWidth; ++i) { - std::vector f(Group::kWidth, empty); - f[i] = full; - EXPECT_EQ(i, Group{f.data()}.CountLeadingEmptyOrDeleted()); - } - std::vector f(Group::kWidth, empty); - f[Group::kWidth * 2 / 3] = full; - f[Group::kWidth / 2] = full; - EXPECT_EQ(Group::kWidth / 2, - Group{f.data()}.CountLeadingEmptyOrDeleted()); - } - } -} - template struct ValuePolicy { using slot_type = T; From 9d51fa78353589138570e03a89601da24ebbc099 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Wed, 21 May 2025 15:29:38 -0700 Subject: [PATCH 046/107] Add `Group::MaskFullOrSentinel` implementation without usage. It is intended to be used for optimization experiments for iteration. PiperOrigin-RevId: 761692460 Change-Id: Ia642e2f3a627ee24d4a09ef2edb8948d49206699 --- .../internal/hashtable_control_bytes.h | 22 +++ .../internal/hashtable_control_bytes_test.cc | 137 +++++++++++------- 2 files changed, 108 insertions(+), 51 deletions(-) diff --git a/absl/container/internal/hashtable_control_bytes.h b/absl/container/internal/hashtable_control_bytes.h index abaadc3bae2..b6068504b26 100644 --- a/absl/container/internal/hashtable_control_bytes.h +++ b/absl/container/internal/hashtable_control_bytes.h @@ -324,6 +324,15 @@ struct GroupSse2Impl { _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)))); } + // Returns a bitmask representing the positions of full or sentinel slots. + // Note: for `is_small()` tables group may contain the "same" slot twice: + // original and mirrored. + NonIterableBitMaskType MaskFullOrSentinel() const { + auto special = _mm_set1_epi8(static_cast(ctrl_t::kSentinel) - 1); + return NonIterableBitMaskType(static_cast( + _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(ctrl, special)))); + } + // Returns the number of trailing empty or deleted elements in the group. uint32_t CountLeadingEmptyOrDeleted() const { auto special = _mm_set1_epi8(static_cast(ctrl_t::kSentinel)); @@ -406,6 +415,15 @@ struct GroupAArch64Impl { return NonIterableBitMaskType(mask); } + NonIterableBitMaskType MaskFullOrSentinel() const { + uint64_t mask = vget_lane_u64( + vreinterpret_u64_u8( + vcgt_s8(vreinterpret_s8_u8(ctrl), + vdup_n_s8(static_cast(ctrl_t::kSentinel) - 1))), + 0); + return NonIterableBitMaskType(mask); + } + uint32_t CountLeadingEmptyOrDeleted() const { uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vcle_s8( @@ -481,6 +499,10 @@ struct GroupPortableImpl { return NonIterableBitMaskType((ctrl & ~(ctrl << 7)) & kMsbs8Bytes); } + auto MaskFullOrSentinel() const { + return NonIterableBitMaskType((~ctrl | (ctrl << 7)) & kMsbs8Bytes); + } + uint32_t CountLeadingEmptyOrDeleted() const { // ctrl | ~(ctrl >> 7) will have the lowest bit set to zero for kEmpty and // kDeleted. We lower all other bits and count number of trailing zeros. diff --git a/absl/container/internal/hashtable_control_bytes_test.cc b/absl/container/internal/hashtable_control_bytes_test.cc index 11d8f54a511..593236a3701 100644 --- a/absl/container/internal/hashtable_control_bytes_test.cc +++ b/absl/container/internal/hashtable_control_bytes_test.cc @@ -127,125 +127,160 @@ TEST(BitMask, LeadingTrailing) { EXPECT_EQ((BitMask(0x8000000000000000).TrailingZeros()), 7); } -TEST(Group, Match) { - if (Group::kWidth == 16) { +template +class GroupTest : public testing::Test {}; +using GroupTypes = + ::testing::Types; +TYPED_TEST_SUITE(GroupTest, GroupTypes); + +TYPED_TEST(GroupTest, Match) { + using GroupType = TypeParam; + if (GroupType::kWidth == 16) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; - EXPECT_THAT(Group{group}.Match(0), ElementsAre()); - EXPECT_THAT(Group{group}.Match(1), ElementsAre(1, 11, 12, 13, 14, 15)); - EXPECT_THAT(Group{group}.Match(3), ElementsAre(3, 10)); - EXPECT_THAT(Group{group}.Match(5), ElementsAre(5, 9)); - EXPECT_THAT(Group{group}.Match(7), ElementsAre(7, 8)); - } else if (Group::kWidth == 8) { + EXPECT_THAT(GroupType{group}.Match(0), ElementsAre()); + EXPECT_THAT(GroupType{group}.Match(1), ElementsAre(1, 11, 12, 13, 14, 15)); + EXPECT_THAT(GroupType{group}.Match(3), ElementsAre(3, 10)); + EXPECT_THAT(GroupType{group}.Match(5), ElementsAre(5, 9)); + EXPECT_THAT(GroupType{group}.Match(7), ElementsAre(7, 8)); + } else if (GroupType::kWidth == 8) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), ctrl_t::kDeleted, CtrlT(2), CtrlT(1), ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.Match(0), ElementsAre()); - EXPECT_THAT(Group{group}.Match(1), ElementsAre(1, 5, 7)); - EXPECT_THAT(Group{group}.Match(2), ElementsAre(2, 4)); + EXPECT_THAT(GroupType{group}.Match(0), ElementsAre()); + EXPECT_THAT(GroupType{group}.Match(1), ElementsAre(1, 5, 7)); + EXPECT_THAT(GroupType{group}.Match(2), ElementsAre(2, 4)); } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + FAIL() << "No test coverage for Group::kWidth==" << GroupType::kWidth; } } -TEST(Group, MaskEmpty) { - if (Group::kWidth == 16) { +TYPED_TEST(GroupTest, MaskEmpty) { + using GroupType = TypeParam; + if (GroupType::kWidth == 16) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskEmpty().LowestBitSet(), 0); - EXPECT_THAT(Group{group}.MaskEmpty().HighestBitSet(), 4); - } else if (Group::kWidth == 8) { + EXPECT_THAT(GroupType{group}.MaskEmpty().LowestBitSet(), 0); + EXPECT_THAT(GroupType{group}.MaskEmpty().HighestBitSet(), 4); + } else if (GroupType::kWidth == 8) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), ctrl_t::kDeleted, CtrlT(2), CtrlT(1), ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskEmpty().LowestBitSet(), 0); - EXPECT_THAT(Group{group}.MaskEmpty().HighestBitSet(), 0); + EXPECT_THAT(GroupType{group}.MaskEmpty().LowestBitSet(), 0); + EXPECT_THAT(GroupType{group}.MaskEmpty().HighestBitSet(), 0); } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + FAIL() << "No test coverage for Group::kWidth==" << GroupType::kWidth; } } -TEST(Group, MaskFull) { - if (Group::kWidth == 16) { +TYPED_TEST(GroupTest, MaskFull) { + using GroupType = TypeParam; + if (GroupType::kWidth == 16) { ctrl_t group[] = { ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), CtrlT(7), CtrlT(5), ctrl_t::kDeleted, CtrlT(1), CtrlT(1), ctrl_t::kSentinel, ctrl_t::kEmpty, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskFull(), + EXPECT_THAT(GroupType{group}.MaskFull(), ElementsAre(1, 3, 5, 7, 8, 9, 11, 12, 15)); - } else if (Group::kWidth == 8) { + } else if (GroupType::kWidth == 8) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel, ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskFull(), ElementsAre(1, 4, 7)); + EXPECT_THAT(GroupType{group}.MaskFull(), ElementsAre(1, 4, 7)); } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + FAIL() << "No test coverage for Group::kWidth==" << GroupType::kWidth; } } -TEST(Group, MaskNonFull) { - if (Group::kWidth == 16) { +TYPED_TEST(GroupTest, MaskNonFull) { + using GroupType = TypeParam; + if (GroupType::kWidth == 16) { ctrl_t group[] = { ctrl_t::kEmpty, CtrlT(1), ctrl_t::kDeleted, CtrlT(3), ctrl_t::kEmpty, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), CtrlT(7), CtrlT(5), ctrl_t::kDeleted, CtrlT(1), CtrlT(1), ctrl_t::kSentinel, ctrl_t::kEmpty, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskNonFull(), + EXPECT_THAT(GroupType{group}.MaskNonFull(), ElementsAre(0, 2, 4, 6, 10, 13, 14)); - } else if (Group::kWidth == 8) { + } else if (GroupType::kWidth == 8) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel, ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskNonFull(), ElementsAre(0, 2, 3, 5, 6)); + EXPECT_THAT(GroupType{group}.MaskNonFull(), ElementsAre(0, 2, 3, 5, 6)); } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + FAIL() << "No test coverage for Group::kWidth==" << GroupType::kWidth; } } -TEST(Group, MaskEmptyOrDeleted) { - if (Group::kWidth == 16) { +TYPED_TEST(GroupTest, MaskEmptyOrDeleted) { + using GroupType = TypeParam; + if (GroupType::kWidth == 16) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), ctrl_t::kEmpty, CtrlT(3), ctrl_t::kDeleted, CtrlT(5), ctrl_t::kSentinel, CtrlT(7), CtrlT(7), CtrlT(5), CtrlT(3), CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1), CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().LowestBitSet(), 0); - EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().HighestBitSet(), 4); - } else if (Group::kWidth == 8) { + EXPECT_THAT(GroupType{group}.MaskEmptyOrDeleted().LowestBitSet(), 0); + EXPECT_THAT(GroupType{group}.MaskEmptyOrDeleted().HighestBitSet(), 4); + } else if (GroupType::kWidth == 8) { ctrl_t group[] = {ctrl_t::kEmpty, CtrlT(1), CtrlT(2), ctrl_t::kDeleted, CtrlT(2), CtrlT(1), ctrl_t::kSentinel, CtrlT(1)}; - EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().LowestBitSet(), 0); - EXPECT_THAT(Group{group}.MaskEmptyOrDeleted().HighestBitSet(), 3); + EXPECT_THAT(GroupType{group}.MaskEmptyOrDeleted().LowestBitSet(), 0); + EXPECT_THAT(GroupType{group}.MaskEmptyOrDeleted().HighestBitSet(), 3); + } else { + FAIL() << "No test coverage for Group::kWidth==" << GroupType::kWidth; + } +} + +TYPED_TEST(GroupTest, MaskFullOrSentinel) { + using GroupType = TypeParam; + if (GroupType::kWidth == 16) { + ctrl_t group[] = { + ctrl_t::kEmpty, ctrl_t::kDeleted, ctrl_t::kEmpty, CtrlT(3), + ctrl_t::kDeleted, CtrlT(5), ctrl_t::kSentinel, ctrl_t::kEmpty, + ctrl_t::kEmpty, ctrl_t::kDeleted, ctrl_t::kDeleted, ctrl_t::kDeleted, + ctrl_t::kEmpty, ctrl_t::kDeleted, ctrl_t::kDeleted, ctrl_t::kDeleted, + }; + EXPECT_THAT(GroupType{group}.MaskFullOrSentinel().LowestBitSet(), 3); + EXPECT_THAT(GroupType{group}.MaskFullOrSentinel().HighestBitSet(), 6); + } else if (GroupType::kWidth == 8) { + ctrl_t group[] = {ctrl_t::kEmpty, ctrl_t::kDeleted, CtrlT(2), + ctrl_t::kDeleted, CtrlT(2), ctrl_t::kSentinel, + ctrl_t::kDeleted, ctrl_t::kEmpty}; + EXPECT_THAT(GroupType{group}.MaskFullOrSentinel().LowestBitSet(), 2); + EXPECT_THAT(GroupType{group}.MaskFullOrSentinel().HighestBitSet(), 5); } else { - FAIL() << "No test coverage for Group::kWidth==" << Group::kWidth; + FAIL() << "No test coverage for Group::kWidth==" << GroupType::kWidth; } } -TEST(Group, CountLeadingEmptyOrDeleted) { +TYPED_TEST(GroupTest, CountLeadingEmptyOrDeleted) { + using GroupType = TypeParam; const std::vector empty_examples = {ctrl_t::kEmpty, ctrl_t::kDeleted}; const std::vector full_examples = { CtrlT(0), CtrlT(1), CtrlT(2), CtrlT(3), CtrlT(5), CtrlT(9), CtrlT(127), ctrl_t::kSentinel}; for (ctrl_t empty : empty_examples) { - std::vector e(Group::kWidth, empty); - EXPECT_EQ(Group::kWidth, Group{e.data()}.CountLeadingEmptyOrDeleted()); + std::vector e(GroupType::kWidth, empty); + EXPECT_EQ(GroupType::kWidth, + GroupType{e.data()}.CountLeadingEmptyOrDeleted()); for (ctrl_t full : full_examples) { - for (size_t i = 0; i != Group::kWidth; ++i) { - std::vector f(Group::kWidth, empty); + for (size_t i = 0; i != GroupType::kWidth; ++i) { + std::vector f(GroupType::kWidth, empty); f[i] = full; - EXPECT_EQ(i, Group{f.data()}.CountLeadingEmptyOrDeleted()); + EXPECT_EQ(i, GroupType{f.data()}.CountLeadingEmptyOrDeleted()); } - std::vector f(Group::kWidth, empty); - f[Group::kWidth * 2 / 3] = full; - f[Group::kWidth / 2] = full; - EXPECT_EQ(Group::kWidth / 2, - Group{f.data()}.CountLeadingEmptyOrDeleted()); + std::vector f(GroupType::kWidth, empty); + f[GroupType::kWidth * 2 / 3] = full; + f[GroupType::kWidth / 2] = full; + EXPECT_EQ(GroupType::kWidth / 2, + GroupType{f.data()}.CountLeadingEmptyOrDeleted()); } } } From e4c43850ad008b362b53622cb3c88fd915d8f714 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 22 May 2025 18:22:20 -0700 Subject: [PATCH 047/107] Remove template alias nullability annotations. These were deprecated before the last LTS release. PiperOrigin-RevId: 762205726 Change-Id: I9d15a08967086296ecc9650a7ae0ae9c4973c405 --- CMake/AbseilDll.cmake | 1 - absl/base/BUILD.bazel | 1 - absl/base/CMakeLists.txt | 2 - absl/base/internal/nullability_deprecated.h | 106 ------------------ absl/base/nullability.h | 1 - absl/base/nullability_test.cc | 116 -------------------- 6 files changed, 227 deletions(-) delete mode 100644 absl/base/internal/nullability_deprecated.h diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index f01021bb02e..e10a6acfa31 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -25,7 +25,6 @@ set(ABSL_INTERNAL_DLL_FILES "base/internal/low_level_alloc.cc" "base/internal/low_level_alloc.h" "base/internal/low_level_scheduling.h" - "base/internal/nullability_deprecated.h" "base/internal/per_thread_tls.h" "base/internal/poison.cc" "base/internal/poison.h" diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel index 3724ad11df6..ca60d836ee3 100644 --- a/absl/base/BUILD.bazel +++ b/absl/base/BUILD.bazel @@ -82,7 +82,6 @@ cc_library( cc_library( name = "nullability", - srcs = ["internal/nullability_deprecated.h"], hdrs = ["nullability.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, diff --git a/absl/base/CMakeLists.txt b/absl/base/CMakeLists.txt index 23942c04b21..4eb1390b692 100644 --- a/absl/base/CMakeLists.txt +++ b/absl/base/CMakeLists.txt @@ -72,8 +72,6 @@ absl_cc_library( nullability HDRS "nullability.h" - SRCS - "internal/nullability_deprecated.h" DEPS absl::config absl::core_headers diff --git a/absl/base/internal/nullability_deprecated.h b/absl/base/internal/nullability_deprecated.h deleted file mode 100644 index 1174a96eaa3..00000000000 --- a/absl/base/internal/nullability_deprecated.h +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2023 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#ifndef ABSL_BASE_INTERNAL_NULLABILITY_DEPRECATED_H_ -#define ABSL_BASE_INTERNAL_NULLABILITY_DEPRECATED_H_ - -#include "absl/base/attributes.h" -#include "absl/base/config.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace nullability_internal { - -template -using NullableImpl -#if ABSL_HAVE_CPP_ATTRIBUTE(clang::annotate) - [[clang::annotate("Nullable")]] -#endif -// Don't add the _Nullable attribute in Objective-C compiles. Many Objective-C -// projects enable the `-Wnullable-to-nonnull-conversion warning`, which is -// liable to produce false positives. -#if ABSL_HAVE_FEATURE(nullability_on_classes) && !defined(__OBJC__) - = T _Nullable; -#else - = T; -#endif - -template -using NonnullImpl -#if ABSL_HAVE_CPP_ATTRIBUTE(clang::annotate) - [[clang::annotate("Nonnull")]] -#endif -#if ABSL_HAVE_FEATURE(nullability_on_classes) && !defined(__OBJC__) - = T _Nonnull; -#else - = T; -#endif - -template -using NullabilityUnknownImpl -#if ABSL_HAVE_CPP_ATTRIBUTE(clang::annotate) - [[clang::annotate("Nullability_Unspecified")]] -#endif -#if ABSL_HAVE_FEATURE(nullability_on_classes) && !defined(__OBJC__) - = T _Null_unspecified; -#else - = T; -#endif - -} // namespace nullability_internal - -// The following template aliases are deprecated forms of nullability -// annotations. They have some limitations, for example, an incompatibility with -// `auto*` pointers, as `auto` cannot be used in a template argument. -// -// It is important to note that these annotations are not distinct strong -// *types*. They are alias templates defined to be equal to the underlying -// pointer type. A pointer annotated `Nonnull`, for example, is simply a -// pointer of type `T*`. -// -// Prefer the macro style annotations in `absl/base/nullability.h` instead. - -// absl::Nonnull, analogous to absl_nonnull -// -// Example: -// absl::Nonnull foo; -// Is equivalent to: -// int* absl_nonnull foo; -template -using Nonnull [[deprecated("Use `absl_nonnull`.")]] = - nullability_internal::NonnullImpl; - -// absl::Nullable, analogous to absl_nullable -// -// Example: -// absl::Nullable foo; -// Is equivalent to: -// int* absl_nullable foo; -template -using Nullable [[deprecated("Use `absl_nullable`.")]] = - nullability_internal::NullableImpl; - -// absl::NullabilityUnknown, analogous to absl_nullability_unknown -// -// Example: -// absl::NullabilityUnknown foo; -// Is equivalent to: -// int* absl_nullability_unknown foo; -template -using NullabilityUnknown [[deprecated("Use `absl_nullability_unknown`.")]] = - nullability_internal::NullabilityUnknownImpl; - -ABSL_NAMESPACE_END -} // namespace absl - -#endif // ABSL_BASE_INTERNAL_NULLABILITY_DEPRECATED_H_ diff --git a/absl/base/nullability.h b/absl/base/nullability.h index 3a5d6e83e20..2796a36125b 100644 --- a/absl/base/nullability.h +++ b/absl/base/nullability.h @@ -184,7 +184,6 @@ #define ABSL_BASE_NULLABILITY_H_ #include "absl/base/config.h" -#include "absl/base/internal/nullability_deprecated.h" // ABSL_POINTERS_DEFAULT_NONNULL // diff --git a/absl/base/nullability_test.cc b/absl/base/nullability_test.cc index bccc388beb9..bccf1af45fe 100644 --- a/absl/base/nullability_test.cc +++ b/absl/base/nullability_test.cc @@ -14,16 +14,13 @@ #include "absl/base/nullability.h" -#include #include #include #include #include "gtest/gtest.h" -#include "absl/base/attributes.h" namespace { -namespace macro_annotations { void funcWithNonnullArg(int* absl_nonnull /*arg*/) {} template void funcWithDeducedNonnullArg(T* absl_nonnull /*arg*/) {} @@ -90,117 +87,4 @@ TEST(PassThroughTest, PassesThroughPointerToMemberFunction) { EXPECT_TRUE((std::is_same::value)); EXPECT_TRUE((std::is_same::value)); } -} // namespace macro_annotations - -// Allow testing of the deprecated type alias annotations. -ABSL_INTERNAL_DISABLE_DEPRECATED_DECLARATION_WARNING - -using ::absl::Nonnull; -using ::absl::NullabilityUnknown; -using ::absl::Nullable; -namespace type_alias_annotations { - -void funcWithNonnullArg(Nonnull /*arg*/) {} -template -void funcWithDeducedNonnullArg(Nonnull /*arg*/) {} - -TEST(NonnullTest, NonnullArgument) { - int var = 0; - funcWithNonnullArg(&var); - funcWithDeducedNonnullArg(&var); -} - -Nonnull funcWithNonnullReturn() { - static int var = 0; - return &var; -} - -TEST(NonnullTest, NonnullReturn) { - auto var = funcWithNonnullReturn(); - (void)var; -} - -TEST(PassThroughTest, PassesThroughRawPointerToInt) { - EXPECT_TRUE((std::is_same, int*>::value)); - EXPECT_TRUE((std::is_same, int*>::value)); - EXPECT_TRUE((std::is_same, int*>::value)); -} - -TEST(PassThroughTest, PassesThroughRawPointerToVoid) { - EXPECT_TRUE((std::is_same, void*>::value)); - EXPECT_TRUE((std::is_same, void*>::value)); - EXPECT_TRUE((std::is_same, void*>::value)); -} - -TEST(PassThroughTest, PassesThroughUniquePointerToInt) { - using T = std::unique_ptr; - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); -} - -TEST(PassThroughTest, PassesThroughSharedPointerToInt) { - using T = std::shared_ptr; - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); -} - -TEST(PassThroughTest, PassesThroughSharedPointerToVoid) { - using T = std::shared_ptr; - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); -} - -TEST(PassThroughTest, PassesThroughPointerToMemberObject) { - using T = decltype(&std::pair::first); - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); -} - -TEST(PassThroughTest, PassesThroughPointerToMemberFunction) { - using T = decltype(&std::unique_ptr::reset); - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); - EXPECT_TRUE((std::is_same, T>::value)); -} - -} // namespace type_alias_annotations -} // namespace - -// Nullable ADL lookup test -namespace util { -// Helper for NullableAdlTest. Returns true, denoting that argument-dependent -// lookup found this implementation of DidAdlWin. Must be in namespace -// util itself, not a nested anonymous namespace. -template -bool DidAdlWin(T*) { - return true; -} - -// Because this type is defined in namespace util, an unqualified call to -// DidAdlWin with a pointer to MakeAdlWin will find the above implementation. -struct MakeAdlWin {}; -} // namespace util - -namespace { -// Returns false, denoting that ADL did not inspect namespace util. If it -// had, the better match (T*) above would have won out over the (...) here. -bool DidAdlWin(...) { return false; } - -TEST(NullableAdlTest, NullableAddsNothingToArgumentDependentLookup) { - // Treatment: util::Nullable contributes nothing to ADL because - // int* itself doesn't. - EXPECT_FALSE(DidAdlWin((int*)nullptr)); - EXPECT_FALSE(DidAdlWin((Nullable)nullptr)); - - // Control: Argument-dependent lookup does find the implementation in - // namespace util when the underlying pointee type resides there. - EXPECT_TRUE(DidAdlWin((util::MakeAdlWin*)nullptr)); - EXPECT_TRUE(DidAdlWin((Nullable)nullptr)); -} - -ABSL_INTERNAL_RESTORE_DEPRECATED_DECLARATION_WARNING } // namespace From bd4bfed9ba625ebf9b992dc425202d59ee3ca90c Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 27 May 2025 10:45:01 -0700 Subject: [PATCH 048/107] Avoid hashing the key in prefetch() for small tables. Also: - Assert that we aren't reading control when the table has capacity 0 because it is uninitialized in that case. - Use [[maybe_unused]] instead of cast to void. - Add NOLINT for erroneous "assert can be static_assert" lint warning. PiperOrigin-RevId: 763859124 Change-Id: Id8b8acb24882357e75cfe751e9b62fb94befddfa --- absl/container/internal/raw_hash_set.cc | 10 +++++++--- absl/container/internal/raw_hash_set.h | 9 ++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index 5238c81a2df..79d2e123c73 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -669,8 +669,12 @@ void ResizeNonSooImpl(CommonFields& common, ABSL_SWISSTABLE_ASSERT(new_capacity > policy.soo_capacity()); const size_t old_capacity = common.capacity(); - [[maybe_unused]] ctrl_t* old_ctrl = common.control(); - [[maybe_unused]] void* old_slots = common.slot_array(); + [[maybe_unused]] ctrl_t* old_ctrl; + [[maybe_unused]] void* old_slots; + if constexpr (kMode == ResizeNonSooMode::kGuaranteedAllocated) { + old_ctrl = common.control(); + old_slots = common.slot_array(); + } const size_t slot_size = policy.slot_size; const size_t slot_align = policy.slot_align; @@ -879,7 +883,7 @@ void GrowIntoSingleGroupShuffleControlBytes(ctrl_t* __restrict old_ctrl, return; } - ABSL_SWISSTABLE_ASSERT(Group::kWidth == 16); + ABSL_SWISSTABLE_ASSERT(Group::kWidth == 16); // NOLINT(misc-static-assert) // Fill the second half of the main control bytes with kEmpty. // For small capacity that may write into mirrored control bytes. diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index f9c9b0b7e3a..002fd887f7c 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -282,10 +282,8 @@ void SwapAlloc(AllocType& lhs, AllocType& rhs, swap(lhs, rhs); } template -void SwapAlloc(AllocType& lhs, AllocType& rhs, +void SwapAlloc([[maybe_unused]] AllocType& lhs, [[maybe_unused]] AllocType& rhs, std::false_type /* propagate_on_container_swap */) { - (void)lhs; - (void)rhs; assert(lhs == rhs && "It's UB to call swap with unequal non-propagating allocators."); } @@ -949,6 +947,7 @@ class CommonFields : public CommonFieldsGenerationInfo { void* soo_data() { return heap_or_soo_.get_soo_data(); } ctrl_t* control() const { + ABSL_SWISSTABLE_ASSERT(capacity() > 0); ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap_or_soo_.control().get()); } @@ -2806,12 +2805,12 @@ class raw_hash_set { // NOTE: This is a very low level operation and should not be used without // specific benchmarks indicating its importance. template - void prefetch(const key_arg& key) const { + void prefetch([[maybe_unused]] const key_arg& key) const { if (capacity() == DefaultCapacity()) return; - (void)key; // Avoid probing if we won't be able to prefetch the addresses received. #ifdef ABSL_HAVE_PREFETCH prefetch_heap_block(); + if (is_small()) return; auto seq = probe(common(), hash_of(key)); PrefetchToLocalCache(control() + seq.offset()); PrefetchToLocalCache(slot_array() + seq.offset()); From 8718e816c50ab218e8148c04e3887fdc721aec9d Mon Sep 17 00:00:00 2001 From: Hannah Shi Date: Tue, 27 May 2025 11:32:33 -0700 Subject: [PATCH 049/107] PR #1895: use c++17 in podspec Imported from GitHub PR https://github.com/abseil/abseil-cpp/pull/1895 Abseil requires C++ 17, set it in generated podspec Merge 03f6a39f502026d80064786353e55bbbf01fb922 into e4c43850ad008b362b53622cb3c88fd915d8f714 Merging this change closes #1895 COPYBARA_INTEGRATE_REVIEW=https://github.com/abseil/abseil-cpp/pull/1895 from HannahShiSFB:use-cpp-17-in-podspec 03f6a39f502026d80064786353e55bbbf01fb922 PiperOrigin-RevId: 763879584 Change-Id: Ie1e3dc6f9943813a8a6cdeacfa6d122d81365ee6 --- absl/abseil.podspec.gen.py | 1 + 1 file changed, 1 insertion(+) diff --git a/absl/abseil.podspec.gen.py b/absl/abseil.podspec.gen.py index e1afa210bc9..e19f951193c 100755 --- a/absl/abseil.podspec.gen.py +++ b/absl/abseil.podspec.gen.py @@ -42,6 +42,7 @@ 'USER_HEADER_SEARCH_PATHS' => '$(inherited) "$(PODS_TARGET_SRCROOT)"', 'USE_HEADERMAP' => 'NO', 'ALWAYS_SEARCH_USER_PATHS' => 'NO', + 'CLANG_CXX_LANGUAGE_STANDARD' => 'c++17', } s.ios.deployment_target = '12.0' s.osx.deployment_target = '10.13' From 942af4809bcf752b2d1c323f8c38edbabe9ba4cd Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 27 May 2025 12:31:37 -0700 Subject: [PATCH 050/107] Rename PrepareInsertNonSoo to PrepareInsertLarge now that it's no longer used in all non-SOO cases. Also, rename SmallNonSooPrepareInsert for consistency. PiperOrigin-RevId: 763902938 Change-Id: Ie3893932b61a59d195dc96193574a982e40c3b41 --- absl/container/internal/raw_hash_set.cc | 25 +++++++++++-------------- absl/container/internal/raw_hash_set.h | 18 +++++++++--------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index 79d2e123c73..e4122b4bcec 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -1392,7 +1392,7 @@ size_t GrowToNextCapacityAndPrepareInsert( } // namespace -std::pair SmallNonSooPrepareInsert( +std::pair PrepareInsertSmallNonSoo( CommonFields& common, const PolicyFunctions& __restrict policy, absl::FunctionRef get_hash) { ABSL_SWISSTABLE_ASSERT(common.is_small()); @@ -1501,11 +1501,11 @@ size_t RehashOrGrowToNextCapacityAndPrepareInsert( } } -// Slow path for PrepareInsertNonSoo that is called when the table has deleted +// Slow path for PrepareInsertLarge that is called when the table has deleted // slots or need to be resized or rehashed. -size_t PrepareInsertNonSooSlow(CommonFields& common, - const PolicyFunctions& __restrict policy, - size_t hash) { +size_t PrepareInsertLargeSlow(CommonFields& common, + const PolicyFunctions& __restrict policy, + size_t hash) { const GrowthInfo growth_info = common.growth_info(); ABSL_SWISSTABLE_ASSERT(!growth_info.HasNoDeletedAndGrowthLeft()); if (ABSL_PREDICT_TRUE(growth_info.HasNoGrowthLeftAndNoDeleted())) { @@ -1861,14 +1861,11 @@ void ReserveTableToFitNewSize(CommonFields& common, ReserveAllocatedTable(common, policy, new_size); } -size_t PrepareInsertNonSoo(CommonFields& common, - const PolicyFunctions& __restrict policy, - size_t hash, FindInfo target) { - const bool rehash_for_bug_detection = - common.should_rehash_for_bug_detection_on_insert() && - // Required to allow use of ResizeAllocatedTable. - common.capacity() > 0; - if (rehash_for_bug_detection) { +size_t PrepareInsertLarge(CommonFields& common, + const PolicyFunctions& __restrict policy, size_t hash, + FindInfo target) { + ABSL_SWISSTABLE_ASSERT(!common.is_small()); + if (common.should_rehash_for_bug_detection_on_insert()) { // Move to a different heap allocation in order to detect bugs. const size_t cap = common.capacity(); ResizeAllocatedTableWithSeedChange( @@ -1881,7 +1878,7 @@ size_t PrepareInsertNonSoo(CommonFields& common, // and growth_left is positive, we can insert at the first // empty slot in the probe sequence (target). if (ABSL_PREDICT_FALSE(!growth_info.HasNoDeletedAndGrowthLeft())) { - return PrepareInsertNonSooSlow(common, policy, hash); + return PrepareInsertLargeSlow(common, policy, hash); } PrepareInsertCommon(common); common.growth_info().OverwriteEmptyAsFull(); diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 002fd887f7c..9c4b8c05be2 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1795,7 +1795,7 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert(CommonFields& common, // Returns the new control and the new slot. // Hash is only computed if the table is sampled or grew to large size // (is_small()==false). -std::pair SmallNonSooPrepareInsert( +std::pair PrepareInsertSmallNonSoo( CommonFields& common, const PolicyFunctions& policy, absl::FunctionRef get_hash); @@ -1844,11 +1844,11 @@ void* GetRefForEmptyClass(CommonFields& common); // When the table has deleted slots (according to GrowthInfo), the target // position will be searched one more time using `find_first_non_full`. // -// REQUIRES: Table is not SOO. +// REQUIRES: `!common.is_small()`. // REQUIRES: At least one non-full slot available. // REQUIRES: `target` is a valid empty position to insert. -size_t PrepareInsertNonSoo(CommonFields& common, const PolicyFunctions& policy, - size_t hash, FindInfo target); +size_t PrepareInsertLarge(CommonFields& common, const PolicyFunctions& policy, + size_t hash, FindInfo target); // A SwissTable. // @@ -3248,7 +3248,7 @@ class raw_hash_set { } } return {iterator_at_ptr( - SmallNonSooPrepareInsert(common(), GetPolicyFunctions(), + PrepareInsertSmallNonSoo(common(), GetPolicyFunctions(), HashKey{hash_ref(), key})), true}; } @@ -3273,10 +3273,10 @@ class raw_hash_set { auto mask_empty = g.MaskEmpty(); if (ABSL_PREDICT_TRUE(mask_empty)) { size_t target = seq.offset(mask_empty.LowestBitSet()); - return {iterator_at(PrepareInsertNonSoo(common(), GetPolicyFunctions(), - hash, - FindInfo{target, seq.index()})), - true}; + return { + iterator_at(PrepareInsertLarge(common(), GetPolicyFunctions(), hash, + FindInfo{target, seq.index()})), + true}; } seq.next(); ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); From 501d0a586e84dc0fa6fe3b5a5170bc260b19b936 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 27 May 2025 13:36:37 -0700 Subject: [PATCH 051/107] Reduce flakiness in MockDistributions.Examples test case. PiperOrigin-RevId: 763928380 Change-Id: I2063b45e660a9601bb52d72963b19803863c5c36 --- absl/random/mock_distributions_test.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/absl/random/mock_distributions_test.cc b/absl/random/mock_distributions_test.cc index 622aff7f92f..93af3f92ecf 100644 --- a/absl/random/mock_distributions_test.cc +++ b/absl/random/mock_distributions_test.cc @@ -69,10 +69,11 @@ TEST(MockDistributions, Examples) { .WillOnce(Return(0.001)); EXPECT_EQ(absl::Gaussian(gen, 0.0, 1.0), 0.001); - EXPECT_NE(absl::LogUniform(gen, 0, 1000000, 2), 2040); - EXPECT_CALL(absl::MockLogUniform(), Call(gen, 0, 1000000, 2)) - .WillOnce(Return(2040)); - EXPECT_EQ(absl::LogUniform(gen, 0, 1000000, 2), 2040); + const int kHigh = (1 << 30) - 1; + EXPECT_NE(absl::LogUniform(gen, 0, kHigh, 2), kHigh); + EXPECT_CALL(absl::MockLogUniform(), Call(gen, 0, kHigh, 2)) + .WillOnce(Return(kHigh)); + EXPECT_EQ(absl::LogUniform(gen, 0, kHigh, 2), kHigh); } TEST(MockDistributions, UniformUInt128BoundariesAreAllowed) { From 94be0866ac6e38b5ed34df55599f66a9cde4e06c Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Tue, 27 May 2025 14:16:17 -0700 Subject: [PATCH 052/107] Use `MaskFullOrSentinel` in `skip_empty_or_deleted`. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` name old INSTRUCTIONS/op new INSTRUCTIONS/op delta BM_Iteration/1/1 18.0 ± 0% 16.0 ± 0% -11.11% (p=0.000 n=157+157) BM_Iteration/2/2 42.0 ± 0% 36.0 ± 0% -14.29% (p=0.000 n=137+130) BM_Iteration/4/4 71.1 ±21% 61.3 ± 1% -13.80% (p=0.000 n=157+101) BM_Iteration/7/7 74.0 ± 0% 60.0 ± 0% -18.92% (p=0.000 n=157+157) BM_Iteration/10/10 139 ±20% 115 ±20% -16.75% (p=0.000 n=157+157) BM_Iteration/15/15 235 ±18% 193 ±18% -17.98% (p=0.000 n=152+157) BM_Iteration/16/16 241 ±20% 199 ±21% -17.79% (p=0.000 n=157+157) BM_Iteration/54/54 573 ± 9% 455 ± 7% -20.50% (p=0.000 n=157+157) BM_Iteration/100/100 1.11k ± 7% 0.88k ± 6% -20.76% (p=0.000 n=157+154) BM_Iteration/400/400 4.42k ± 4% 3.48k ± 4% -21.11% (p=0.000 n=157+157) BM_Iteration/100/1 125 ± 8% 111 ± 0% -10.93% (p=0.000 n=135+131) BM_Iteration/1000/10 1.69k ± 2% 1.42k ± 1% -16.22% (p=0.000 n=156+149) name old CYCLES/op new CYCLES/op delta BM_Iteration/1/1 5.02 ± 1% 5.00 ± 0% -0.41% (p=0.000 n=149+156) BM_Iteration/2/2 9.61 ±13% 8.08 ± 2% -15.87% (p=0.000 n=157+130) BM_Iteration/4/4 18.4 ±16% 16.5 ±19% -10.33% (p=0.000 n=127+124) BM_Iteration/7/7 21.0 ± 0% 13.2 ± 2% -37.39% (p=0.000 n=156+136) BM_Iteration/10/10 37.5 ±52% 35.2 ±52% -5.97% (p=0.007 n=157+157) BM_Iteration/15/15 87.4 ±41% 81.2 ±45% -7.10% (p=0.000 n=151+157) BM_Iteration/16/16 88.9 ±48% 82.7 ±45% -6.98% (p=0.001 n=157+157) BM_Iteration/54/54 159 ±27% 152 ±31% -4.81% (p=0.001 n=154+155) BM_Iteration/100/100 365 ±25% 345 ±28% -5.49% (p=0.000 n=157+157) BM_Iteration/400/400 1.54k ±13% 1.43k ±13% -7.16% (p=0.000 n=157+157) BM_Iteration/100/1 77.5 ±14% 29.8 ±29% -61.50% (p=0.000 n=157+157) BM_Iteration/1000/10 2.00k ± 3% 0.41k ±10% -79.54% (p=0.000 n=152+156) ``` PiperOrigin-RevId: 763944602 Change-Id: Ibb3aa34485dd9fc06346304d7a6ff6fb348afb6c --- absl/container/internal/raw_hash_set.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 9c4b8c05be2..32ba5b28ee0 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -2066,10 +2066,20 @@ class raw_hash_set { // `slot_` until they reach one. void skip_empty_or_deleted() { while (IsEmptyOrDeleted(*ctrl_)) { - uint32_t shift = - GroupFullEmptyOrDeleted{ctrl_}.CountLeadingEmptyOrDeleted(); - ctrl_ += shift; - slot_ += shift; + auto mask = GroupFullEmptyOrDeleted{ctrl_}.MaskFullOrSentinel(); + // Generally it is possible to compute `shift` branchless. + // This branch is useful to: + // 1. Avoid checking `IsEmptyOrDeleted` after the shift for the most + // common dense table case. + // 2. Avoid the cost of `LowestBitSet` for extremely sparse tables. + if (ABSL_PREDICT_TRUE(mask)) { + auto shift = mask.LowestBitSet(); + ctrl_ += shift; + slot_ += shift; + return; + } + ctrl_ += Group::kWidth; + slot_ += Group::kWidth; } } From 8a5cefc62a5960408a4b3dad91650d9dcd24a93c Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Wed, 28 May 2025 12:38:35 -0700 Subject: [PATCH 053/107] Assert that SetCtrl isn't called on small tables - there are no control bytes in such cases. Fix a case of calling SetCtrl for small tables in FindNewPositionsAndTransferSlots. PiperOrigin-RevId: 764373106 Change-Id: Ie50573013291846d48d6e2d25775229d47cedeac --- absl/container/internal/raw_hash_set.cc | 10 +++++++--- absl/container/internal/raw_hash_set.h | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index e4122b4bcec..f33232054dc 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -580,9 +580,13 @@ size_t FindNewPositionsAndTransferSlots( const auto insert_slot = [&](void* slot) { size_t hash = policy.hash_slot(hash_fn, slot); - FindInfo target = - common.is_small() ? FindInfo{0, 0} : find_first_non_full(common, hash); - SetCtrl(common, target.offset, H2(hash), slot_size); + FindInfo target; + if (common.is_small()) { + target = FindInfo{0, 0}; + } else { + target = find_first_non_full(common, hash); + SetCtrl(common, target.offset, H2(hash), slot_size); + } policy.transfer_n(&common, SlotAddress(new_slots, target.offset, slot_size), slot, 1); return target.probe_length; diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 32ba5b28ee0..ac0ff4a0784 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1497,6 +1497,7 @@ inline void DoSanitizeOnSetCtrl(const CommonFields& c, size_t i, ctrl_t h, // mirror the value to the cloned tail if necessary. inline void SetCtrl(const CommonFields& c, size_t i, ctrl_t h, size_t slot_size) { + ABSL_SWISSTABLE_ASSERT(!c.is_small()); DoSanitizeOnSetCtrl(c, i, h, slot_size); ctrl_t* ctrl = c.control(); ctrl[i] = h; @@ -1512,6 +1513,7 @@ inline void SetCtrl(const CommonFields& c, size_t i, h2_t h, size_t slot_size) { // setting the cloned control byte. inline void SetCtrlInSingleGroupTable(const CommonFields& c, size_t i, ctrl_t h, size_t slot_size) { + ABSL_SWISSTABLE_ASSERT(!c.is_small()); ABSL_SWISSTABLE_ASSERT(is_single_group(c.capacity())); DoSanitizeOnSetCtrl(c, i, h, slot_size); ctrl_t* ctrl = c.control(); From 3006ff8b41cf0c4eaedb202bb4993febf71e4911 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Wed, 28 May 2025 14:55:27 -0700 Subject: [PATCH 054/107] Use a proper fix instead of a workaround for a parameter annotated absl_nonnull since the latest Clang can see through the workaround PiperOrigin-RevId: 764428456 Change-Id: I88398f924333a72abb39ffb87ecbd02f751d89eb --- absl/strings/str_format_test.cc | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index 969e1f9eee6..3f265d192c7 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -516,14 +516,11 @@ TEST_F(FormatEntryPointTest, SNPrintF) { EXPECT_EQ(result, 17); EXPECT_EQ(std::string(buffer), "NUMBER: 1234567"); - // The `output` parameter is annotated nonnull, but we want to test that - // it is never written to if the size is zero. - // Use a variable instead of passing nullptr directly to avoid a `-Wnonnull` - // warning. - char* null_output = nullptr; - result = - SNPrintF(null_output, 0, "Just checking the %s of the output.", "size"); + // Test that the buffer is never written to if the size is zero. + buffer[0] = '\0'; + result = SNPrintF(buffer, 0, "Just checking the %s of the output.", "size"); EXPECT_EQ(result, 37); + EXPECT_EQ(buffer[0], '\0'); } TEST_F(FormatEntryPointTest, SNPrintFWithV) { @@ -551,14 +548,11 @@ TEST_F(FormatEntryPointTest, SNPrintFWithV) { std::string size = "size"; - // The `output` parameter is annotated nonnull, but we want to test that - // it is never written to if the size is zero. - // Use a variable instead of passing nullptr directly to avoid a `-Wnonnull` - // warning. - char* null_output = nullptr; - result = - SNPrintF(null_output, 0, "Just checking the %v of the output.", size); + // Test that the buffer is never written to if the size is zero. + buffer[0] = '\0'; + result = SNPrintF(buffer, 0, "Just checking the %v of the output.", size); EXPECT_EQ(result, 37); + EXPECT_EQ(buffer[0], '\0'); } TEST(StrFormat, BehavesAsDocumented) { From 7e67f4ffaaa3cbc72e7f3b8b9827d070ab94f89a Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Thu, 29 May 2025 06:00:34 -0700 Subject: [PATCH 055/107] Use Xcode 16.3 for testing PiperOrigin-RevId: 764695105 Change-Id: Ic6968c954b07a6ff3e83acc480a2a498f1795cc8 --- ci/macos_xcode_bazel.sh | 4 ++-- ci/macos_xcode_cmake.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/macos_xcode_bazel.sh b/ci/macos_xcode_bazel.sh index b05cfac2267..aaedb355f67 100755 --- a/ci/macos_xcode_bazel.sh +++ b/ci/macos_xcode_bazel.sh @@ -19,8 +19,8 @@ set -euox pipefail -# Use Xcode 16.0 -sudo xcode-select -s /Applications/Xcode_16.0.app/Contents/Developer +# Use Xcode 16.3 +sudo xcode-select -s /Applications/Xcode_16.3.app/Contents/Developer if [[ -z ${ABSEIL_ROOT:-} ]]; then ABSEIL_ROOT="$(realpath $(dirname ${0})/..)" diff --git a/ci/macos_xcode_cmake.sh b/ci/macos_xcode_cmake.sh index 6811b87d328..84a746b566f 100755 --- a/ci/macos_xcode_cmake.sh +++ b/ci/macos_xcode_cmake.sh @@ -16,8 +16,8 @@ set -euox pipefail -# Use Xcode 16.0 -sudo xcode-select -s /Applications/Xcode_16.0.app/Contents/Developer +# Use Xcode 16.3 +sudo xcode-select -s /Applications/Xcode_16.3.app/Contents/Developer export CMAKE_BUILD_PARALLEL_LEVEL=$(sysctl -n hw.ncpu) export CTEST_PARALLEL_LEVEL=$(sysctl -n hw.ncpu) From 4d8e467f282bf9657caa0f1053b5032cd2cd7676 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Thu, 29 May 2025 07:46:46 -0700 Subject: [PATCH 056/107] Fix CI on macOS Sequoia This includes fixes for both the CI scripts in the new OS image and a fix for newly detected missing nullability annotations in string_view with an upgraded Clang. It is also no longer necessary to uninstall google-benchmark since it is not installed by default in the new image. PiperOrigin-RevId: 764725127 Change-Id: I6ea8c55b4d9a9afd261bf4ae9c5e8239675ea7ca --- absl/strings/string_view.h | 6 +++--- ci/macos_xcode_bazel.sh | 3 --- ci/macos_xcode_cmake.sh | 2 ++ 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 9a1933b611b..00769b5f39c 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -376,7 +376,7 @@ class ABSL_ATTRIBUTE_VIEW string_view { // // Copies the contents of the `string_view` at offset `pos` and length `n` // into `buf`. - size_type copy(char* buf, size_type n, size_type pos = 0) const { + size_type copy(char* absl_nonnull buf, size_type n, size_type pos = 0) const { if (ABSL_PREDICT_FALSE(pos > length_)) { base_internal::ThrowStdOutOfRange("absl::string_view::copy"); } @@ -624,7 +624,7 @@ class ABSL_ATTRIBUTE_VIEW string_view { // Overload of `string_view::starts_with()` that returns true if the // `string_view` starts with the C-style prefix `s`. - constexpr bool starts_with(const char* s) const { + constexpr bool starts_with(const char* absl_nonnull s) const { return starts_with(string_view(s)); } @@ -649,7 +649,7 @@ class ABSL_ATTRIBUTE_VIEW string_view { // Overload of `string_view::ends_with()` that returns true if the // `string_view` ends with the C-style suffix `s`. - constexpr bool ends_with(const char* s) const { + constexpr bool ends_with(const char* absl_nonnull s) const { return ends_with(string_view(s)); } #endif // ABSL_INTERNAL_CPLUSPLUS_LANG >= 202002L diff --git a/ci/macos_xcode_bazel.sh b/ci/macos_xcode_bazel.sh index aaedb355f67..f19cd5052b4 100755 --- a/ci/macos_xcode_bazel.sh +++ b/ci/macos_xcode_bazel.sh @@ -54,9 +54,6 @@ if [[ -n "${ALTERNATE_OPTIONS:-}" ]]; then cp ${ALTERNATE_OPTIONS:-} absl/base/options.h || exit 1 fi -# Avoid using the system version of google-benchmark. -brew uninstall google-benchmark - ${BAZEL_BIN} test ... \ --copt="-DGTEST_REMOVE_LEGACY_TEST_CASEAPI_=1" \ --copt="-Werror" \ diff --git a/ci/macos_xcode_cmake.sh b/ci/macos_xcode_cmake.sh index 84a746b566f..5b11b895d80 100755 --- a/ci/macos_xcode_cmake.sh +++ b/ci/macos_xcode_cmake.sh @@ -19,6 +19,8 @@ set -euox pipefail # Use Xcode 16.3 sudo xcode-select -s /Applications/Xcode_16.3.app/Contents/Developer +brew install cmake + export CMAKE_BUILD_PARALLEL_LEVEL=$(sysctl -n hw.ncpu) export CTEST_PARALLEL_LEVEL=$(sysctl -n hw.ncpu) From 61103b948d587aff46f259a23bfa7e28385561e7 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Thu, 29 May 2025 09:01:26 -0700 Subject: [PATCH 057/107] `absl::string_view`: Add a debug assert to the single-argument constructor that the argument is not `nullptr`. Passing `nullptr` to the single-argument constructor of `std::string_view` is undefined behavior. `absl::string_view` has long accepted `nullptr` arguments to the single-argument constructor. This debug assert will assist users in finding uses of `absl::string_view(nullptr)` to allow migration to `std::string_view`. If you find that this assert is firing, consider using `absl::NullSafeStringView` to safely construct the `string_view`. PiperOrigin-RevId: 764751809 Change-Id: I3d1ee1c01fd7faf71523122abc254372374374f8 --- absl/strings/string_view.h | 4 ++- absl/strings/string_view_test.cc | 50 +------------------------------- 2 files changed, 4 insertions(+), 50 deletions(-) diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 00769b5f39c..e382618bb93 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -198,7 +198,9 @@ class ABSL_ATTRIBUTE_VIEW string_view { // The length check is skipped since it is unnecessary and causes code bloat. constexpr string_view( // NOLINT(runtime/explicit) const char* absl_nonnull str) - : ptr_(str), length_(str ? StrlenInternal(str) : 0) {} + : ptr_(str), length_(str ? StrlenInternal(str) : 0) { + assert(str != nullptr); + } // Constructor of a `string_view` from a `const char*` and length. constexpr string_view(const char* absl_nullable data, size_type len) diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index 0a2a7a97b4b..adf4d1b45bf 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -870,42 +870,10 @@ TEST(StringViewTest, FrontBackEmpty) { #endif } -// `std::string_view::string_view(const char*)` calls -// `std::char_traits::length(const char*)` to get the string length. In -// libc++, it doesn't allow `nullptr` in the constexpr context, with the error -// "read of dereferenced null pointer is not allowed in a constant expression". -// At run time, the behavior of `std::char_traits::length()` on `nullptr` is -// undefined by the standard and usually results in crash with libc++. -// GCC also started rejected this in libstdc++ starting in GCC9. -// In MSVC, creating a constexpr string_view from nullptr also triggers an -// "unevaluable pointer value" error. This compiler implementation conforms -// to the standard, but `absl::string_view` implements a different -// behavior for historical reasons. We work around tests that construct -// `string_view` from `nullptr` when using libc++. -#if !defined(ABSL_USES_STD_STRING_VIEW) || \ - (!(defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE >= 9) && \ - !defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) -#define ABSL_HAVE_STRING_VIEW_FROM_NULLPTR 1 -#endif - -TEST(StringViewTest, NULLInput) { +TEST(StringViewTest, DefaultConstructor) { absl::string_view s; EXPECT_EQ(s.data(), nullptr); EXPECT_EQ(s.size(), 0u); - -#ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR - // The `str` parameter is annotated nonnull, but we want to test the defensive - // null check. Use a variable instead of passing nullptr directly to avoid a - // `-Wnonnull` warning. - char* null_str = nullptr; - s = absl::string_view(null_str); - EXPECT_EQ(s.data(), nullptr); - EXPECT_EQ(s.size(), 0u); - - // .ToString() on a absl::string_view with nullptr should produce the empty - // string. - EXPECT_EQ("", std::string(s)); -#endif // ABSL_HAVE_STRING_VIEW_FROM_NULLPTR } TEST(StringViewTest, Comparisons2) { @@ -1086,16 +1054,6 @@ TEST(StringViewTest, ConstexprCompiles) { // know at compile time that the argument is nullptr and complain because the // parameter is annotated nonnull. We hence turn the warning off for this // test. -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wnonnull" -#endif -#ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR - constexpr absl::string_view cstr(nullptr); -#endif -#if defined(__clang__) -#pragma clang diagnostic pop -#endif constexpr absl::string_view cstr_len("cstr", 4); #if defined(ABSL_USES_STD_STRING_VIEW) @@ -1163,12 +1121,6 @@ TEST(StringViewTest, ConstexprCompiles) { constexpr absl::string_view::iterator const_end_empty = sp.end(); EXPECT_EQ(const_begin_empty, const_end_empty); -#ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR - constexpr absl::string_view::iterator const_begin_nullptr = cstr.begin(); - constexpr absl::string_view::iterator const_end_nullptr = cstr.end(); - EXPECT_EQ(const_begin_nullptr, const_end_nullptr); -#endif // ABSL_HAVE_STRING_VIEW_FROM_NULLPTR - constexpr absl::string_view::iterator const_begin = cstr_len.begin(); constexpr absl::string_view::iterator const_end = cstr_len.end(); constexpr absl::string_view::size_type const_size = cstr_len.size(); From 2dd8036891687917ad5296281ef5c867c3538950 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Thu, 29 May 2025 09:16:34 -0700 Subject: [PATCH 058/107] Update CI to a more recent Clang on Linux x86-64 /opt/llvm/clang/bin/clang -v clang version 21.0.0git (https://github.com/llvm/llvm-project 2dee1a2de3d2cc774fbd43507cf4320418fbf279) This change adds `-Wno-unused-command-line-argument` to the Clang builds since Bazel is now providing an unused `-c` to rules that do header syntax checking only. Rules that were missing copts which causes them to encounter this error are also fixed. PiperOrigin-RevId: 764757573 Change-Id: I1eba676327a6023c5b8afb54499e4a3a72c2cdad --- absl/base/BUILD.bazel | 1 + absl/copts/GENERATED_AbseilCopts.cmake | 2 ++ absl/copts/GENERATED_copts.bzl | 2 ++ absl/copts/copts.py | 1 + absl/profiling/BUILD.bazel | 2 ++ absl/strings/BUILD.bazel | 1 + absl/strings/CMakeLists.txt | 2 +- ci/linux_docker_containers.sh | 4 ++-- 8 files changed, 12 insertions(+), 3 deletions(-) diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel index ca60d836ee3..0debd3e74e9 100644 --- a/absl/base/BUILD.bazel +++ b/absl/base/BUILD.bazel @@ -719,6 +719,7 @@ cc_library( testonly = True, srcs = ["internal/scoped_set_env.cc"], hdrs = ["internal/scoped_set_env.h"], + copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, visibility = [ "//absl:__subpackages__", diff --git a/absl/copts/GENERATED_AbseilCopts.cmake b/absl/copts/GENERATED_AbseilCopts.cmake index 7d8af924ba7..32b97fcb307 100644 --- a/absl/copts/GENERATED_AbseilCopts.cmake +++ b/absl/copts/GENERATED_AbseilCopts.cmake @@ -121,6 +121,7 @@ list(APPEND ABSL_LLVM_FLAGS "-Wno-implicit-float-conversion" "-Wno-implicit-int-float-conversion" "-Wno-unknown-warning-option" + "-Wno-unused-command-line-argument" "-DNOMINMAX" ) @@ -160,6 +161,7 @@ list(APPEND ABSL_LLVM_TEST_FLAGS "-Wno-implicit-float-conversion" "-Wno-implicit-int-float-conversion" "-Wno-unknown-warning-option" + "-Wno-unused-command-line-argument" "-DNOMINMAX" "-Wno-deprecated-declarations" "-Wno-implicit-int-conversion" diff --git a/absl/copts/GENERATED_copts.bzl b/absl/copts/GENERATED_copts.bzl index 23896e9dc25..8d7219044ee 100644 --- a/absl/copts/GENERATED_copts.bzl +++ b/absl/copts/GENERATED_copts.bzl @@ -122,6 +122,7 @@ ABSL_LLVM_FLAGS = [ "-Wno-implicit-float-conversion", "-Wno-implicit-int-float-conversion", "-Wno-unknown-warning-option", + "-Wno-unused-command-line-argument", "-DNOMINMAX", ] @@ -161,6 +162,7 @@ ABSL_LLVM_TEST_FLAGS = [ "-Wno-implicit-float-conversion", "-Wno-implicit-int-float-conversion", "-Wno-unknown-warning-option", + "-Wno-unused-command-line-argument", "-DNOMINMAX", "-Wno-deprecated-declarations", "-Wno-implicit-int-conversion", diff --git a/absl/copts/copts.py b/absl/copts/copts.py index 8cf8f310724..e6c4385d9f9 100644 --- a/absl/copts/copts.py +++ b/absl/copts/copts.py @@ -84,6 +84,7 @@ # Disable warnings on unknown warning flags (when warning flags are # unknown on older compiler versions) "-Wno-unknown-warning-option", + "-Wno-unused-command-line-argument", # Don't define min and max macros (Build on Windows using clang) "-DNOMINMAX", ] diff --git a/absl/profiling/BUILD.bazel b/absl/profiling/BUILD.bazel index ee4800de48f..4a3c64430e5 100644 --- a/absl/profiling/BUILD.bazel +++ b/absl/profiling/BUILD.bazel @@ -49,6 +49,7 @@ cc_library( cc_test( name = "sample_recorder_test", srcs = ["internal/sample_recorder_test.cc"], + copts = ABSL_TEST_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, tags = [ "no_test_wasm", @@ -69,6 +70,7 @@ cc_library( name = "exponential_biased", srcs = ["internal/exponential_biased.cc"], hdrs = ["internal/exponential_biased.h"], + copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, visibility = [ "//absl:__subpackages__", diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index bb152acc325..0b9d37298f8 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -1462,6 +1462,7 @@ cc_library( testonly = True, srcs = ["internal/pow10_helper.cc"], hdrs = ["internal/pow10_helper.h"], + copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, visibility = ["//visibility:private"], deps = ["//absl/base:config"], diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 547ef268690..32ad263c1ef 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -663,7 +663,7 @@ absl_cc_library( SRCS "internal/pow10_helper.cc" COPTS - ${ABSL_TEST_COPTS} + ${ABSL_DEFAULT_COPTS} DEPS absl::config TESTONLY diff --git a/ci/linux_docker_containers.sh b/ci/linux_docker_containers.sh index 0f454716681..cb0904c2c3a 100644 --- a/ci/linux_docker_containers.sh +++ b/ci/linux_docker_containers.sh @@ -16,7 +16,7 @@ # Test scripts should source this file to get the identifiers. readonly LINUX_ALPINE_CONTAINER="gcr.io/google.com/absl-177019/alpine:20230612" -readonly LINUX_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20250430" +readonly LINUX_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20250527" readonly LINUX_ARM_CLANG_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_arm_hybrid-latest:20250430" -readonly LINUX_GCC_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20250430" +readonly LINUX_GCC_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20250527" readonly LINUX_GCC_FLOOR_CONTAINER="gcr.io/google.com/absl-177019/linux_gcc-floor:20250430" From 1f10c0bee39f39a339a1fe871fdba8d7af867bc0 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Fri, 30 May 2025 10:05:04 -0700 Subject: [PATCH 059/107] Add a test case that -1.0 and 1.0 have different hashes. PiperOrigin-RevId: 765235417 Change-Id: I13b8077d40250147416b6fd5a5becf59ea513379 --- absl/hash/hash_test.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index 7582f54431f..03642b540b8 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -1224,4 +1224,9 @@ TEST(HashOf, AutoReturnTypeUser) { absl::Hash{}(AutoReturnTypeUser{1, s})); } +TEST(HashOf, DoubleSignCollision) { + // These values differ only in their most significant bit. + EXPECT_NE(absl::HashOf(-1.0), absl::HashOf(1.0)); +} + } // namespace From fbb1d06795343e18dde1149eb9794bbd7b11c305 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Fri, 30 May 2025 12:00:14 -0700 Subject: [PATCH 060/107] Make `combine_contiguous` to mix length in a weak way by adding `size << 24`, so that we can avoid a separate mixing of size later. The empty range is mixing 0x57 byte. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was also considered to add just `size` without transformation. But this causes collisions for small contiguous strings that may be quite a common case. So it is better to avoid additional disruptions for client code. ``` name old INSTRUCTIONS/op new INSTRUCTIONS/op delta BM_latency_AbslHash_Int32 9.00 ± 0% 9.00 ± 0% ~ (all samples are equal) BM_latency_AbslHash_Int64 9.00 ± 0% 9.00 ± 0% ~ (all samples are equal) BM_latency_AbslHash_String3 32.0 ± 0% 34.0 ± 0% +6.25% (p=0.000 n=57+57) BM_latency_AbslHash_String5 31.0 ± 5% 32.8 ± 5% +6.00% (p=0.000 n=57+57) BM_latency_AbslHash_String9 28.8 ± 4% 30.2 ± 5% +4.94% (p=0.000 n=53+53) BM_latency_AbslHash_String17 27.4 ± 3% 27.6 ± 4% +0.64% (p=0.032 n=53+52) BM_latency_AbslHash_String33 32.8 ± 4% 32.5 ± 8% ~ (p=0.079 n=51+53) BM_latency_AbslHash_String65 55.9 ±14% 55.1 ±10% ~ (p=0.288 n=52+51) BM_latency_AbslHash_String257 122 ±11% 121 ±11% ~ (p=0.432 n=50+55) name old CYCLES/op new CYCLES/op delta BM_latency_AbslHash_Int32 16.0 ± 2% 16.1 ± 4% ~ (p=0.433 n=56+57) BM_latency_AbslHash_Int64 16.4 ± 4% 16.5 ± 3% ~ (p=0.076 n=56+46) BM_latency_AbslHash_String3 22.6 ± 0% 21.0 ± 1% -6.83% (p=0.000 n=52+47) BM_latency_AbslHash_String5 22.2 ± 5% 21.2 ± 4% -4.53% (p=0.000 n=54+56) BM_latency_AbslHash_String9 22.1 ±11% 20.4 ± 5% -7.72% (p=0.000 n=51+55) BM_latency_AbslHash_String17 21.4 ± 3% 19.6 ± 4% -8.52% (p=0.000 n=54+51) BM_latency_AbslHash_String33 23.5 ± 4% 20.8 ± 7% -11.42% (p=0.000 n=53+54) BM_latency_AbslHash_String65 28.6 ± 6% 27.0 ± 7% -5.47% (p=0.000 n=53+55) BM_latency_AbslHash_String257 46.7 ± 9% 44.4 ±10% -4.93% (p=0.000 n=52+56) ``` PiperOrigin-RevId: 765281185 Change-Id: I0754877ca7eaa4d187390fe631ef407d379ccae7 --- absl/container/fixed_array.h | 3 +- absl/container/inlined_vector.h | 4 +- absl/hash/hash_test.cc | 14 +++- absl/hash/internal/hash.cc | 22 ++++-- absl/hash/internal/hash.h | 104 +++++++++++++++++----------- absl/hash/internal/spy_hash_state.h | 9 ++- absl/strings/cord.h | 3 +- absl/types/span.h | 3 +- 8 files changed, 104 insertions(+), 58 deletions(-) diff --git a/absl/container/fixed_array.h b/absl/container/fixed_array.h index 6c238fc381f..b08735f4619 100644 --- a/absl/container/fixed_array.h +++ b/absl/container/fixed_array.h @@ -392,8 +392,7 @@ class ABSL_ATTRIBUTE_WARN_UNUSED FixedArray { template friend H AbslHashValue(H h, const FixedArray& v) { - return H::combine(H::combine_contiguous(std::move(h), v.data(), v.size()), - hash_internal::WeaklyMixedInteger{v.size()}); + return H::combine_contiguous(std::move(h), v.data(), v.size()); } private: diff --git a/absl/container/inlined_vector.h b/absl/container/inlined_vector.h index f871b349134..c53cbd21c1d 100644 --- a/absl/container/inlined_vector.h +++ b/absl/container/inlined_vector.h @@ -1008,9 +1008,7 @@ bool operator>=(const absl::InlinedVector& a, // call this directly. template H AbslHashValue(H h, const absl::InlinedVector& a) { - auto size = a.size(); - return H::combine(H::combine_contiguous(std::move(h), a.data(), size), - hash_internal::WeaklyMixedInteger{size}); + return H::combine_contiguous(std::move(h), a.data(), a.size()); } ABSL_NAMESPACE_END diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index 03642b540b8..85e34c915f0 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -171,6 +171,9 @@ TEST(HashValueTest, PointerAlignment) { constexpr size_t kLog2NumValues = 5; constexpr size_t kNumValues = 1 << kLog2NumValues; + int64_t test_count = 0; + int64_t total_stuck_bit_count = 0; + for (size_t align = 1; align < kTotalSize / kNumValues; align < 8 ? align += 1 : align < 1024 ? align += 8 : align += 32) { SCOPED_TRACE(align); @@ -188,9 +191,16 @@ TEST(HashValueTest, PointerAlignment) { // Limit the scope to the bits we would be using for Swisstable. constexpr size_t kMask = (1 << (kLog2NumValues + 7)) - 1; size_t stuck_bits = (~bits_or | bits_and) & kMask; - // Test that there are at most 3 stuck bits. - EXPECT_LE(absl::popcount(stuck_bits), 3) << "0x" << std::hex << stuck_bits; + int stuck_bit_count = absl::popcount(stuck_bits); + // Test that there are at most 4 stuck bits. + EXPECT_LE(stuck_bit_count, 4) << "0x" << std::hex << stuck_bits; + + total_stuck_bit_count += stuck_bit_count; + ++test_count; } + // Test that average across alignments are at most 0.2 stuck bits. + // As of 2025-05-30 test is also passing with 0.07 stuck bits. + EXPECT_LE(total_stuck_bit_count, 0.2 * test_count); } TEST(HashValueTest, PointerToMember) { diff --git a/absl/hash/internal/hash.cc b/absl/hash/internal/hash.cc index b185a0acfef..1b47e614d4a 100644 --- a/absl/hash/internal/hash.cc +++ b/absl/hash/internal/hash.cc @@ -27,27 +27,39 @@ ABSL_NAMESPACE_BEGIN namespace hash_internal { uint64_t MixingHashState::CombineLargeContiguousImpl32( - uint64_t state, const unsigned char* first, size_t len) { + const unsigned char* first, size_t len, uint64_t state) { while (len >= PiecewiseChunkSize()) { - state = Mix( - state ^ hash_internal::CityHash32(reinterpret_cast(first), + // TODO(b/417141985): avoid code duplication with CombineContiguousImpl. + state = + Mix(PrecombineLengthMix(state, PiecewiseChunkSize()) ^ + hash_internal::CityHash32(reinterpret_cast(first), PiecewiseChunkSize()), - kMul); + kMul); len -= PiecewiseChunkSize(); first += PiecewiseChunkSize(); } + // Do not call CombineContiguousImpl for empty range since it is modifying + // state. + if (len == 0) { + return state; + } // Handle the remainder. return CombineContiguousImpl(state, first, len, std::integral_constant{}); } uint64_t MixingHashState::CombineLargeContiguousImpl64( - uint64_t state, const unsigned char* first, size_t len) { + const unsigned char* first, size_t len, uint64_t state) { while (len >= PiecewiseChunkSize()) { state = Hash64(first, PiecewiseChunkSize(), state); len -= PiecewiseChunkSize(); first += PiecewiseChunkSize(); } + // Do not call CombineContiguousImpl for empty range since it is modifying + // state. + if (len == 0) { + return state; + } // Handle the remainder. return CombineContiguousImpl(state, first, len, std::integral_constant{}); diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index f400c7be1a7..fa88227b36e 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -127,7 +127,7 @@ constexpr size_t PiecewiseChunkSize() { return 1024; } // return combiner.finalize(std::move(state)); class PiecewiseCombiner { public: - PiecewiseCombiner() : position_(0) {} + PiecewiseCombiner() = default; PiecewiseCombiner(const PiecewiseCombiner&) = delete; PiecewiseCombiner& operator=(const PiecewiseCombiner&) = delete; @@ -157,7 +157,8 @@ class PiecewiseCombiner { private: unsigned char buf_[PiecewiseChunkSize()]; - size_t position_; + size_t position_ = 0; + bool added_something_ = false; }; // is_hashable() @@ -620,9 +621,7 @@ H AbslHashValue(H hash_state, const std::shared_ptr& ptr) { // `eq()` member isn't equivalent to `==` on the underlying character type. template H AbslHashValue(H hash_state, absl::string_view str) { - return H::combine( - H::combine_contiguous(std::move(hash_state), str.data(), str.size()), - WeaklyMixedInteger{str.size()}); + return H::combine_contiguous(std::move(hash_state), str.data(), str.size()); } // Support std::wstring, std::u16string and std::u32string. @@ -633,9 +632,7 @@ template , Alloc>& str) { - return H::combine( - H::combine_contiguous(std::move(hash_state), str.data(), str.size()), - WeaklyMixedInteger{str.size()}); + return H::combine_contiguous(std::move(hash_state), str.data(), str.size()); } // Support std::wstring_view, std::u16string_view and std::u32string_view. @@ -644,9 +641,7 @@ template ::value || std::is_same::value>> H AbslHashValue(H hash_state, std::basic_string_view str) { - return H::combine( - H::combine_contiguous(std::move(hash_state), str.data(), str.size()), - WeaklyMixedInteger{str.size()}); + return H::combine_contiguous(std::move(hash_state), str.data(), str.size()); } #if defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && \ @@ -728,9 +723,8 @@ template typename std::enable_if::value && !std::is_same::value, H>::type AbslHashValue(H hash_state, const std::vector& vector) { - return H::combine(H::combine_contiguous(std::move(hash_state), vector.data(), - vector.size()), - WeaklyMixedInteger{vector.size()}); + return H::combine_contiguous(std::move(hash_state), vector.data(), + vector.size()); } // AbslHashValue special cases for hashing std::vector @@ -957,7 +951,23 @@ hash_range_or_bytes(H hash_state, const T* data, size_t size) { for (const auto end = data + size; data < end; ++data) { hash_state = H::combine(std::move(hash_state), *data); } - return hash_state; + return H::combine(std::move(hash_state), + hash_internal::WeaklyMixedInteger{size}); +} + +// Extremely weak mixture of length that is added to the state before combining +// the data. It is used only for small strings. +inline uint64_t PrecombineLengthMix(uint64_t state, size_t len) { + // The length is always one byte here. We place it to 4th byte for the + // following reasons: + // 1. 4th byte is unused for very short strings 0-3 bytes. + // 2. 4th byte is duplicated for 4 bytes string. + // 3. 4th byte is in the middle and mixed well for 5-8 bytes strings. + // + // There were experiments with adding just `len` here. + // Also seems have slightly better performance overall, that gives collisions + // for small strings. + return state + (uint64_t{len} << 24); } #if defined(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE) && \ @@ -1201,8 +1211,8 @@ class ABSL_DLL MixingHashState : public HashStateBase { } else if (len > 0) { v = Read1To3(first, len); } else { - // Empty ranges have no effect. - return state; + // Empty string must modify the state. + v = 0x57; } return WeakMix(state, v); } @@ -1238,12 +1248,10 @@ class ABSL_DLL MixingHashState : public HashStateBase { // Slow dispatch path for calls to CombineContiguousImpl with a size argument // larger than PiecewiseChunkSize(). Has the same effect as calling // CombineContiguousImpl() repeatedly with the chunk stride size. - static uint64_t CombineLargeContiguousImpl32(uint64_t state, - const unsigned char* first, - size_t len); - static uint64_t CombineLargeContiguousImpl64(uint64_t state, - const unsigned char* first, - size_t len); + static uint64_t CombineLargeContiguousImpl32(const unsigned char* first, + size_t len, uint64_t state); + static uint64_t CombineLargeContiguousImpl64(const unsigned char* first, + size_t len, uint64_t state); // Reads 9 to 16 bytes from p. // The first 8 bytes are in .first, and the rest of the bytes are in .second @@ -1266,16 +1274,18 @@ class ABSL_DLL MixingHashState : public HashStateBase { #endif } - // Reads 4 to 8 bytes from p. Some input bytes may be duplicated in output. + // Reads 4 to 8 bytes from p. + // Bytes are permuted and some input bytes may be duplicated in output. static uint64_t Read4To8(const unsigned char* p, size_t len) { - // If `len < 8`, we duplicate bytes in the middle. - // E.g.: + // If `len < 8`, we duplicate bytes. We always put low memory at the end. + // E.g., on little endian platforms: // `ABCD` will be read as `ABCDABCD`. - // `ABCDE` will be read as `ABCDBCDE`. - // `ABCDEF` will be read as `ABCDCDEF`. - // `ABCDEFG` will be read as `ABCDDEFG`. + // `ABCDE` will be read as `BCDEABCD`. + // `ABCDEF` will be read as `CDEFABCD`. + // `ABCDEFG` will be read as `DEFGABCD`. + // `ABCDEFGH` will be read as `EFGHABCD`. // We also do not care about endianness. On big-endian platforms, bytes will - // be shuffled (it's fine). We always shift low memory by 32, because that + // be permuted differently. We always shift low memory by 32, because that // can be pipelined earlier. Reading high memory requires computing // `p + len - 4`. uint64_t most_significant = @@ -1373,15 +1383,17 @@ inline uint64_t MixingHashState::CombineContiguousImpl( // For large values we use CityHash, for small ones we use custom low latency // hash. if (len <= 8) { - return CombineSmallContiguousImpl(state, first, len); + return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first, + len); } if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { // TODO(b/417141985): expose and use CityHash32WithSeed. - return Mix(state ^ hash_internal::CityHash32( - reinterpret_cast(first), len), + return Mix(PrecombineLengthMix(state, len) ^ + hash_internal::CityHash32( + reinterpret_cast(first), len), kMul); } - return CombineLargeContiguousImpl32(state, first, len); + return CombineLargeContiguousImpl32(first, len, state); } // Overload of MixingHashState::CombineContiguousImpl() @@ -1391,18 +1403,25 @@ inline uint64_t MixingHashState::CombineContiguousImpl( // For large values we use LowLevelHash or CityHash depending on the platform, // for small ones we use custom low latency hash. if (len <= 8) { - return CombineSmallContiguousImpl(state, first, len); + return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first, + len); } if (len <= 16) { - return CombineContiguousImpl9to16(state, first, len); + return CombineContiguousImpl9to16(PrecombineLengthMix(state, len), first, + len); } if (len <= 32) { - return CombineContiguousImpl17to32(state, first, len); + return CombineContiguousImpl17to32(PrecombineLengthMix(state, len), first, + len); } if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { + // Length is mixed into the state inside of Hash64. return Hash64(first, len, state); } - return CombineLargeContiguousImpl64(state, first, len); + // We must not mix length to the state here because calling + // CombineContiguousImpl twice with PiecewiseChunkSize() must be equivalent + // to calling CombineLargeContiguousImpl once with 2 * PiecewiseChunkSize(). + return CombineLargeContiguousImpl64(first, len, state); } struct AggregateBarrier {}; @@ -1462,7 +1481,7 @@ H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, position_ += size; return state; } - + added_something_ = true; // If the buffer is partially filled we need to complete the buffer // and hash it. if (position_ != 0) { @@ -1488,7 +1507,12 @@ H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, // HashStateBase::PiecewiseCombiner::finalize() template H PiecewiseCombiner::finalize(H state) { - // Hash the remainder left in the buffer, which may be empty + // Do not call combine_contiguous with empty remainder since it is modifying + // state. + if (added_something_ && position_ == 0) { + return state; + } + // We still call combine_contiguous for the entirely empty buffer. return H::combine_contiguous(std::move(state), buf_, position_); } diff --git a/absl/hash/internal/spy_hash_state.h b/absl/hash/internal/spy_hash_state.h index e403113b0ea..823e1e90fcd 100644 --- a/absl/hash/internal/spy_hash_state.h +++ b/absl/hash/internal/spy_hash_state.h @@ -151,6 +151,9 @@ class SpyHashStateImpl : public HashStateBase> { static SpyHashStateImpl combine_contiguous(SpyHashStateImpl hash_state, const unsigned char* begin, size_t size) { + if (size == 0) { + return SpyHashStateImpl::combine_raw(std::move(hash_state), 0); + } const size_t large_chunk_stride = PiecewiseChunkSize(); // Combining a large contiguous buffer must have the same effect as // doing it piecewise by the stride length, followed by the (possibly @@ -165,6 +168,7 @@ class SpyHashStateImpl : public HashStateBase> { if (size > 0) { hash_state.hash_representation_.emplace_back( reinterpret_cast(begin), size); + hash_state = SpyHashStateImpl::combine_raw(std::move(hash_state), size); } return hash_state; } @@ -224,8 +228,9 @@ class SpyHashStateImpl : public HashStateBase> { // Combines raw data from e.g. integrals/floats/pointers/etc. static SpyHashStateImpl combine_raw(SpyHashStateImpl state, uint64_t value) { - const unsigned char* data = reinterpret_cast(&value); - return SpyHashStateImpl::combine_contiguous(std::move(state), data, 8); + state.hash_representation_.emplace_back( + reinterpret_cast(&value), 8); + return state; } // This is true if SpyHashStateImpl has been passed to a call of diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 5aa232e7be4..edda551ae83 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -1098,8 +1098,7 @@ class Cord { hash_state = combiner.add_buffer(std::move(hash_state), chunk.data(), chunk.size()); }); - return H::combine(combiner.finalize(std::move(hash_state)), - hash_internal::WeaklyMixedInteger{size()}); + return combiner.finalize(std::move(hash_state)); } friend class CrcCord; diff --git a/absl/types/span.h b/absl/types/span.h index 39e6a8a5d75..772681c8493 100644 --- a/absl/types/span.h +++ b/absl/types/span.h @@ -499,8 +499,7 @@ class ABSL_ATTRIBUTE_VIEW Span { // Support for absl::Hash. template friend H AbslHashValue(H h, Span v) { - return H::combine(H::combine_contiguous(std::move(h), v.data(), v.size()), - hash_internal::WeaklyMixedInteger{v.size()}); + return H::combine_contiguous(std::move(h), v.data(), v.size()); } private: From e1ff6a3339138790a443b349647a83ddcb798ffa Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Fri, 30 May 2025 14:49:48 -0700 Subject: [PATCH 061/107] Fix a false nullability warning in [Q]CHECK_OK by replacing nullptr with an empty char* PiperOrigin-RevId: 765346332 Change-Id: I3a56ef48c9f5b241dcbc56b0a4ee5aa1d9c9614f --- absl/log/internal/check_op.h | 66 +++++++++++++++++------------------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/absl/log/internal/check_op.h b/absl/log/internal/check_op.h index 725340282bd..17afdedae25 100644 --- a/absl/log/internal/check_op.h +++ b/absl/log/internal/check_op.h @@ -133,41 +133,39 @@ // string literal and abort without doing any streaming. We don't need to // strip the call to stringify the non-ok `Status` as long as we don't log it; // dropping the `Status`'s message text is out of scope. -#define ABSL_LOG_INTERNAL_CHECK_OK(val, val_text) \ - for (::std::pair \ - absl_log_internal_check_ok_goo; \ - absl_log_internal_check_ok_goo.first = \ - ::absl::log_internal::AsStatus(val), \ - absl_log_internal_check_ok_goo.second = \ - ABSL_PREDICT_TRUE(absl_log_internal_check_ok_goo.first->ok()) \ - ? nullptr \ - : ::absl::status_internal::MakeCheckFailString( \ - absl_log_internal_check_ok_goo.first, \ - ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL(val_text \ - " is OK")), \ - !ABSL_PREDICT_TRUE(absl_log_internal_check_ok_goo.first->ok());) \ - ABSL_LOG_INTERNAL_CONDITION_FATAL(STATELESS, true) \ - ABSL_LOG_INTERNAL_CHECK(::absl::implicit_cast( \ - absl_log_internal_check_ok_goo.second)) \ +#define ABSL_LOG_INTERNAL_CHECK_OK(val, val_text) \ + for (::std::pair \ + absl_log_internal_check_ok_goo; \ + absl_log_internal_check_ok_goo.first = \ + ::absl::log_internal::AsStatus(val), \ + absl_log_internal_check_ok_goo.second = \ + ABSL_PREDICT_TRUE(absl_log_internal_check_ok_goo.first->ok()) \ + ? "" /* Don't use nullptr, to keep the annotation happy */ \ + : ::absl::status_internal::MakeCheckFailString( \ + absl_log_internal_check_ok_goo.first, \ + ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL(val_text \ + " is OK")), \ + !ABSL_PREDICT_TRUE(absl_log_internal_check_ok_goo.first->ok());) \ + ABSL_LOG_INTERNAL_CONDITION_FATAL(STATELESS, true) \ + ABSL_LOG_INTERNAL_CHECK(absl_log_internal_check_ok_goo.second) \ .InternalStream() -#define ABSL_LOG_INTERNAL_QCHECK_OK(val, val_text) \ - for (::std::pair \ - absl_log_internal_qcheck_ok_goo; \ - absl_log_internal_qcheck_ok_goo.first = \ - ::absl::log_internal::AsStatus(val), \ - absl_log_internal_qcheck_ok_goo.second = \ - ABSL_PREDICT_TRUE(absl_log_internal_qcheck_ok_goo.first->ok()) \ - ? nullptr \ - : ::absl::status_internal::MakeCheckFailString( \ - absl_log_internal_qcheck_ok_goo.first, \ - ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL(val_text \ - " is OK")), \ - !ABSL_PREDICT_TRUE(absl_log_internal_qcheck_ok_goo.first->ok());) \ - ABSL_LOG_INTERNAL_CONDITION_QFATAL(STATELESS, true) \ - ABSL_LOG_INTERNAL_QCHECK(::absl::implicit_cast( \ - absl_log_internal_qcheck_ok_goo.second)) \ +#define ABSL_LOG_INTERNAL_QCHECK_OK(val, val_text) \ + for (::std::pair \ + absl_log_internal_qcheck_ok_goo; \ + absl_log_internal_qcheck_ok_goo.first = \ + ::absl::log_internal::AsStatus(val), \ + absl_log_internal_qcheck_ok_goo.second = \ + ABSL_PREDICT_TRUE(absl_log_internal_qcheck_ok_goo.first->ok()) \ + ? "" /* Don't use nullptr, to keep the annotation happy */ \ + : ::absl::status_internal::MakeCheckFailString( \ + absl_log_internal_qcheck_ok_goo.first, \ + ABSL_LOG_INTERNAL_STRIP_STRING_LITERAL(val_text \ + " is OK")), \ + !ABSL_PREDICT_TRUE(absl_log_internal_qcheck_ok_goo.first->ok());) \ + ABSL_LOG_INTERNAL_CONDITION_QFATAL(STATELESS, true) \ + ABSL_LOG_INTERNAL_QCHECK(absl_log_internal_qcheck_ok_goo.second) \ .InternalStream() namespace absl { From 169c953aab108b3e390d39877b8b05a78a8ee8cf Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Mon, 2 Jun 2025 10:16:25 -0700 Subject: [PATCH 062/107] absl::string_view: assert against (data() == nullptr && size() != 0) PiperOrigin-RevId: 766242956 Change-Id: I7ef72e4abffcf0b0f1ce37f40d8580b4f6be387c --- absl/strings/string_view.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index e382618bb93..eca5404ba8c 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -204,7 +204,9 @@ class ABSL_ATTRIBUTE_VIEW string_view { // Constructor of a `string_view` from a `const char*` and length. constexpr string_view(const char* absl_nullable data, size_type len) - : ptr_(data), length_(CheckLengthInternal(len)) {} + : ptr_(data), length_(CheckLengthInternal(len)) { + ABSL_ASSERT(data != nullptr || len == 0); + } constexpr string_view(const string_view&) noexcept = default; string_view& operator=(const string_view&) noexcept = default; From ccefe1eee03ed2040b517f84f4c8df0f2cb82219 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Tue, 3 Jun 2025 08:31:50 -0700 Subject: [PATCH 063/107] Lift restriction on using EBCO[1] for nested CompressedTuples. The current implementation of CompressedTuple explicitly disallows EBCO for cases where CompressedTuples are nested. This is because the implentation for a tuple with EBCO-compatible element T inherits from Storage, where I is the index of T in the tuple, and CompressedTuple> would inherit twice from Storage, leading to ambiguity. This CL lifts the restriction by tagging Storage with a tag unique to a concrete CompressedTuple type. In the above example, the storage classes for the two T's will be `Storage>` and `Storage>`, respectively. [1] https://en.cppreference.com/w/cpp/language/ebo.html PiperOrigin-RevId: 766677179 Change-Id: I4f0169fadc7466a5d799ae1168b2ed1f195b363e --- absl/container/internal/compressed_tuple.h | 52 ++++++++++--------- .../internal/compressed_tuple_test.cc | 8 +++ 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/absl/container/internal/compressed_tuple.h b/absl/container/internal/compressed_tuple.h index 6db0468d99a..2dd8d6c323c 100644 --- a/absl/container/internal/compressed_tuple.h +++ b/absl/container/internal/compressed_tuple.h @@ -64,24 +64,24 @@ struct Elem, I> template using ElemT = typename Elem::type; -// We can't use EBCO on other CompressedTuples because that would mean that we -// derive from multiple Storage<> instantiations with the same I parameter, -// and potentially from multiple identical Storage<> instantiations. So anytime -// we use type inheritance rather than encapsulation, we mark -// CompressedTupleImpl, to make this easy to detect. -struct uses_inheritance {}; template constexpr bool ShouldUseBase() { return std::is_class::value && std::is_empty::value && - !std::is_final::value && - !std::is_base_of::value; + !std::is_final::value; } +// Tag type used to disambiguate Storage types for different CompresseedTuples. +// Without it, CompressedTuple> would inherit from +// Storage twice. +template +struct StorageTag; + // The storage class provides two specializations: // - For empty classes, it stores T as a base class. // - For everything else, it stores T as a member. -template ()> +// Tag should be set to StorageTag. +template ()> struct Storage { T value; constexpr Storage() = default; @@ -94,8 +94,8 @@ struct Storage { constexpr T&& get() && { return std::move(*this).value; } }; -template -struct ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC Storage : T { +template +struct ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC Storage : T { constexpr Storage() = default; template @@ -111,30 +111,35 @@ template struct ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC CompressedTupleImpl; template -struct ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC CompressedTupleImpl< - CompressedTuple, absl::index_sequence, ShouldAnyUseBase> +struct ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC + CompressedTupleImpl, absl::index_sequence, + ShouldAnyUseBase> // We use the dummy identity function through std::integral_constant to // convince MSVC of accepting and expanding I in that context. Without it // you would get: // error C3548: 'I': parameter pack cannot be used in this context - : uses_inheritance, - Storage::value>... { + : Storage::value, + StorageTag>... { constexpr CompressedTupleImpl() = default; template explicit constexpr CompressedTupleImpl(absl::in_place_t, Vs&&... args) - : Storage(absl::in_place, std::forward(args))... {} + : Storage>(absl::in_place, + std::forward(args))... {} friend CompressedTuple; }; template -struct ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC CompressedTupleImpl< - CompressedTuple, absl::index_sequence, false> +struct ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC + CompressedTupleImpl, absl::index_sequence, + false> // We use the dummy identity function as above... - : Storage::value, false>... { + : Storage::value, StorageTag, + false>... { constexpr CompressedTupleImpl() = default; template explicit constexpr CompressedTupleImpl(absl::in_place_t, Vs&&... args) - : Storage(absl::in_place, std::forward(args))... {} + : Storage, false>(absl::in_place, + std::forward(args))... {} friend CompressedTuple; }; @@ -183,9 +188,7 @@ struct TupleItemsMoveConstructible // Helper class to perform the Empty Base Class Optimization. // Ts can contain classes and non-classes, empty or not. For the ones that // are empty classes, we perform the CompressedTuple. If all types in Ts are -// empty classes, then CompressedTuple is itself an empty class. (This -// does not apply when one or more of those empty classes is itself an empty -// CompressedTuple.) +// empty classes, then CompressedTuple is itself an empty class. // // To access the members, use member .get() function. // @@ -208,7 +211,8 @@ class ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC CompressedTuple using ElemT = internal_compressed_tuple::ElemT; template - using StorageT = internal_compressed_tuple::Storage, I>; + using StorageT = internal_compressed_tuple::Storage< + ElemT, I, internal_compressed_tuple::StorageTag>; public: // There seems to be a bug in MSVC dealing in which using '=default' here will diff --git a/absl/container/internal/compressed_tuple_test.cc b/absl/container/internal/compressed_tuple_test.cc index 01b334e1fac..fd37eb53cd4 100644 --- a/absl/container/internal/compressed_tuple_test.cc +++ b/absl/container/internal/compressed_tuple_test.cc @@ -107,6 +107,14 @@ TEST(CompressedTupleTest, PointerToEmpty) { } } +TEST(CompressedTupleTest, NestedCompressedTuplePreservesEmptiness) { + using TupleType = CompressedTuple, CompressedTuple>>; + TupleType x; + EXPECT_EQ(x.get<0>().value(), CallType::kMutableRef); + EXPECT_EQ(x.get<1>().get<0>().value(), CallType::kMutableRef); + EXPECT_TRUE(std::is_empty_v); +} + TEST(CompressedTupleTest, OneMoveOnRValueConstructionTemp) { InstanceTracker tracker; CompressedTuple x1(CopyableMovableInstance(1)); From 9e51ba2d4ef0ee0ab8f6f0091937a0ba7ac15339 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 3 Jun 2025 09:53:46 -0700 Subject: [PATCH 064/107] Enable CompressedTupleTest.NestedEbo test case. PiperOrigin-RevId: 766707922 Change-Id: Ibb10a5e992665673e2bd918439429b79dbe78fbf --- absl/container/internal/compressed_tuple_test.cc | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/absl/container/internal/compressed_tuple_test.cc b/absl/container/internal/compressed_tuple_test.cc index fd37eb53cd4..662f944640b 100644 --- a/absl/container/internal/compressed_tuple_test.cc +++ b/absl/container/internal/compressed_tuple_test.cc @@ -107,14 +107,6 @@ TEST(CompressedTupleTest, PointerToEmpty) { } } -TEST(CompressedTupleTest, NestedCompressedTuplePreservesEmptiness) { - using TupleType = CompressedTuple, CompressedTuple>>; - TupleType x; - EXPECT_EQ(x.get<0>().value(), CallType::kMutableRef); - EXPECT_EQ(x.get<1>().get<0>().value(), CallType::kMutableRef); - EXPECT_TRUE(std::is_empty_v); -} - TEST(CompressedTupleTest, OneMoveOnRValueConstructionTemp) { InstanceTracker tracker; CompressedTuple x1(CopyableMovableInstance(1)); @@ -460,14 +452,15 @@ TEST(CompressedTupleTest, EmptyFinalClass) { } #endif -// TODO(b/214288561): enable this test. -TEST(CompressedTupleTest, DISABLED_NestedEbo) { +TEST(CompressedTupleTest, NestedEbo) { struct Empty1 {}; struct Empty2 {}; CompressedTuple, int> x; CompressedTuple y; - // Currently fails with sizeof(x) == 8, sizeof(y) == 4. EXPECT_EQ(sizeof(x), sizeof(y)); + + using NestedEmpty = CompressedTuple>; + EXPECT_TRUE(std::is_empty_v); } } // namespace From 669459108da9e467949699b2db904e77221a1d98 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Tue, 3 Jun 2025 11:56:42 -0700 Subject: [PATCH 065/107] Fix conditional constexpr in ToInt64{Nano|Micro|Milli}seconds under GCC7 and GCC8 using an else clause as a workaround PiperOrigin-RevId: 766761689 Change-Id: I703db7502af119d4a1d540291dad17fe9f937957 --- absl/time/time.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/absl/time/time.h b/absl/time/time.h index db17a4cd40b..53bca90d156 100644 --- a/absl/time/time.h +++ b/absl/time/time.h @@ -1869,8 +1869,9 @@ ABSL_ATTRIBUTE_CONST_FUNCTION constexpr int64_t ToInt64Nanoseconds(Duration d) { time_internal::GetRepHi(d) >> 33 == 0) { return (time_internal::GetRepHi(d) * 1000 * 1000 * 1000) + (time_internal::GetRepLo(d) / time_internal::kTicksPerNanosecond); + } else { + return d / Nanoseconds(1); } - return d / Nanoseconds(1); } ABSL_ATTRIBUTE_CONST_FUNCTION constexpr int64_t ToInt64Microseconds( @@ -1880,8 +1881,9 @@ ABSL_ATTRIBUTE_CONST_FUNCTION constexpr int64_t ToInt64Microseconds( return (time_internal::GetRepHi(d) * 1000 * 1000) + (time_internal::GetRepLo(d) / (time_internal::kTicksPerNanosecond * 1000)); + } else { + return d / Microseconds(1); } - return d / Microseconds(1); } ABSL_ATTRIBUTE_CONST_FUNCTION constexpr int64_t ToInt64Milliseconds( @@ -1891,8 +1893,9 @@ ABSL_ATTRIBUTE_CONST_FUNCTION constexpr int64_t ToInt64Milliseconds( return (time_internal::GetRepHi(d) * 1000) + (time_internal::GetRepLo(d) / (time_internal::kTicksPerNanosecond * 1000 * 1000)); + } else { + return d / Milliseconds(1); } - return d / Milliseconds(1); } ABSL_ATTRIBUTE_CONST_FUNCTION constexpr int64_t ToInt64Seconds(Duration d) { From e430bb9b0833137c4365fee39dfa930657b408fc Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Wed, 4 Jun 2025 09:29:31 -0700 Subject: [PATCH 066/107] Fix -Wundef warning PiperOrigin-RevId: 767177504 Change-Id: I92cb3ca37fc7bc663849d48d1f95a6f21f3aaf43 --- absl/hash/internal/hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index fa88227b36e..1564db9a4a5 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -39,7 +39,7 @@ // For feature testing and determining which headers can be included. #if ABSL_INTERNAL_CPLUSPLUS_LANG >= 202002L || \ - ABSL_INTERNAL_VERSION_HEADER_AVAILABLE + defined(ABSL_INTERNAL_VERSION_HEADER_AVAILABLE) #include #else #include From 34593c16e7a5a864c0b50fcd1d7010eecd5851db Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Wed, 4 Jun 2025 10:41:07 -0700 Subject: [PATCH 067/107] Replace WeakMix usage with Mix and change H2 to use the most significant 7 bits - saving 1 cycle in H1. Using Mix instead of WeakMix means that the entire 64 bits of hash are expected to have good entropy. Note that WeakMix only has lower latency than Mix on Arm, but not on x86. PiperOrigin-RevId: 767203392 Change-Id: Ib9655bc4db2e525ea14f348b86f680acf02dcb29 --- absl/container/internal/raw_hash_set.h | 9 +++++---- absl/hash/internal/hash.h | 20 ++++---------------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index ac0ff4a0784..e9486c33e32 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -521,15 +521,16 @@ class HashtableSize { uint64_t data_; }; -// Extracts the H1 portion of a hash: 57 bits mixed with a per-table seed. +// Mixes the hash with a per-table seed. Note that we only use the low bits of +// H1 because we bitwise-and with capacity later. inline size_t H1(size_t hash, PerTableSeed seed) { - return (hash >> 7) ^ seed.seed(); + return hash ^ seed.seed(); } -// Extracts the H2 portion of a hash: the 7 bits not used for H1. +// Extracts the H2 portion of a hash: the 7 most significant bits. // // These are used as an occupied control byte. -inline h2_t H2(size_t hash) { return hash & 0x7F; } +inline h2_t H2(size_t hash) { return hash >> (sizeof(size_t) * 8 - 7); } // When there is an insertion with no reserved growth, we rehash with // probability `min(1, RehashProbabilityConstant() / capacity())`. Using a diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 1564db9a4a5..30aede24796 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -74,7 +74,6 @@ #include #include "absl/base/attributes.h" -#include "absl/base/internal/endian.h" #include "absl/base/internal/unaligned_access.h" #include "absl/base/optimization.h" #include "absl/base/port.h" @@ -504,8 +503,7 @@ std::enable_if_t::value, H> AbslHashValue(H hash_state, T ptr) { auto v = reinterpret_cast(ptr); // Due to alignment, pointers tend to have low bits as zero, and the next few - // bits follow a pattern since they are also multiples of some base value. The - // byte swap in WeakMix helps ensure we still have good entropy in low bits. + // bits follow a pattern since they are also multiples of some base value. // Mix pointers twice to ensure we have good entropy in low bits. return H::combine(std::move(hash_state), v, v); } @@ -1121,7 +1119,7 @@ class ABSL_DLL MixingHashState : public HashStateBase { template ::value, int> = 0> static size_t hash(T value) { return static_cast( - WeakMix(Seed(), static_cast>(value))); + Mix(Seed() ^ static_cast>(value), kMul)); } // Overload of MixingHashState::hash() @@ -1186,7 +1184,7 @@ class ABSL_DLL MixingHashState : public HashStateBase { // optimize Read1To3 and Read4To8 differently for the string case. static MixingHashState combine_raw(MixingHashState hash_state, uint64_t value) { - return MixingHashState(WeakMix(hash_state.state_, value)); + return MixingHashState(Mix(hash_state.state_ ^ value, kMul)); } // Implementation of the base case for combine_contiguous where we actually @@ -1214,7 +1212,7 @@ class ABSL_DLL MixingHashState : public HashStateBase { // Empty string must modify the state. v = 0x57; } - return WeakMix(state, v); + return Mix(state ^ v, kMul); } ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineContiguousImpl9to16( @@ -1323,16 +1321,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { return Uint128High64(m) ^ Uint128Low64(m); } - // Slightly lower latency than Mix, but with lower quality. The byte swap - // helps ensure that low bits still have high quality. - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t WeakMix(uint64_t lhs, - uint64_t rhs) { - const uint64_t n = lhs ^ rhs; - // WeakMix doesn't work well on 32-bit platforms so just use Mix. - if constexpr (sizeof(size_t) < 8) return Mix(n, kMul); - return absl::gbswap_64(n * kMul); - } - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data, size_t len, uint64_t state) { From f8c9a088d66521aacde14357d05c6b2a69000b26 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 5 Jun 2025 10:08:54 -0700 Subject: [PATCH 068/107] Suppress CFI violation on VDSO call. PiperOrigin-RevId: 767658852 Change-Id: I097433844dd6f4db0c9c0fdc0b3f5dd0dcc5ee63 --- absl/debugging/internal/vdso_support.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/absl/debugging/internal/vdso_support.cc b/absl/debugging/internal/vdso_support.cc index 8a588eaffee..f7e2a443500 100644 --- a/absl/debugging/internal/vdso_support.cc +++ b/absl/debugging/internal/vdso_support.cc @@ -17,6 +17,7 @@ // VDSOSupport -- a class representing kernel VDSO (if present). #include "absl/debugging/internal/vdso_support.h" +#include "absl/base/attributes.h" #ifdef ABSL_HAVE_VDSO_SUPPORT // defined in vdso_support.h @@ -190,6 +191,9 @@ long VDSOSupport::InitAndGetCPU(unsigned *cpu, // NOLINT(runtime/int) // This function must be very fast, and may be called from very // low level (e.g. tcmalloc). Hence I avoid things like // GoogleOnceInit() and ::operator new. +// The destination in VDSO is unknown to CFI and VDSO does not set MSAN +// shadow for the return value. +ABSL_ATTRIBUTE_NO_SANITIZE_CFI ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY int GetCPU() { unsigned cpu; From daff1b1a9cd5cf32c7d5409b4844e32e92dde461 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Thu, 5 Jun 2025 15:20:43 -0700 Subject: [PATCH 069/107] Remove unnecessary modification of growth info in small table case. We still allocate and allow modifying GrowthInfo in some codepathes. In most of the hottest codepathes we know that we don't need to modify or read growth_info for small tables. It is still allowed to use `ResetGrowthLeft` and get access via `GetGrowthInfoFromControl`. That simplifies general function and avoids extra complexity. In case we decide to go all the way to allocate only a single slot that will need to be changed. PiperOrigin-RevId: 767785846 Change-Id: Ia66be1044f06ab43d36064edfc8007ea5ae3767e --- absl/container/internal/raw_hash_set.cc | 8 +------ absl/container/internal/raw_hash_set.h | 29 +++++++++++++------------ 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index f33232054dc..daa9ff6a952 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -522,7 +522,6 @@ void EraseMetaOnly(CommonFields& c, const ctrl_t* ctrl, size_t slot_size) { if (c.is_small()) { SanitizerPoisonMemoryRegion(c.slot_array(), slot_size); - c.growth_info().OverwriteFullAsEmpty(); return; } @@ -1433,10 +1432,6 @@ std::pair PrepareInsertSmallNonSoo( static_assert(NextCapacity(0) == 1); PrepareInsertCommon(common); - // TODO(b/413062340): maybe don't allocate growth info for capacity 1 tables. - // Doing so may require additional branches/complexity so it might not be - // worth it. - GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(0); if (ABSL_PREDICT_FALSE(has_infoz)) { ReportSingleGroupTableGrowthToInfoz(common, infoz, get_hash()); @@ -1857,8 +1852,7 @@ void ReserveTableToFitNewSize(CommonFields& common, ABSL_SWISSTABLE_ASSERT(!common.empty() || cap > policy.soo_capacity()); ABSL_SWISSTABLE_ASSERT(cap > 0); const size_t max_size_before_growth = - cap <= policy.soo_capacity() ? policy.soo_capacity() - : common.size() + common.growth_left(); + IsSmallCapacity(cap) ? cap : common.size() + common.growth_left(); if (new_size <= max_size_before_growth) { return; } diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index e9486c33e32..822815114ea 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -806,6 +806,9 @@ constexpr size_t AlignUpTo(size_t offset, size_t align) { // Helper class for computing offsets and allocation size of hash set fields. class RawHashSetLayout { public: + // TODO(b/413062340): maybe don't allocate growth info for capacity 1 tables. + // Doing so may require additional branches/complexity so it might not be + // worth it. explicit RawHashSetLayout(size_t capacity, size_t slot_size, size_t slot_align, bool has_infoz) : control_offset_(ControlOffset(has_infoz)), @@ -965,12 +968,6 @@ class CommonFields : public CommonFieldsGenerationInfo { generate_new_seed(); } } - void* backing_array_start() const { - // growth_info (and maybe infoz) is stored before control bytes. - ABSL_SWISSTABLE_ASSERT( - reinterpret_cast(control()) % alignof(size_t) == 0); - return control() - ControlOffset(has_infoz()); - } // Note: we can't use slots() because Qt defines "slots" as a macro. void* slot_array() const { return heap_or_soo_.slot_array().get(); } @@ -1031,6 +1028,7 @@ class CommonFields : public CommonFieldsGenerationInfo { size_t growth_left() const { return growth_info().GetGrowthLeft(); } GrowthInfo& growth_info() { + ABSL_SWISSTABLE_ASSERT(!is_small()); return GetGrowthInfoFromControl(control()); } GrowthInfo growth_info() const { @@ -1040,14 +1038,21 @@ class CommonFields : public CommonFieldsGenerationInfo { bool has_infoz() const { return size_.has_infoz(); } void set_has_infoz() { size_.set_has_infoz(); } + HashtablezInfoHandle* infoz_ptr() const { + // growth_info is stored before control bytes. + ABSL_SWISSTABLE_ASSERT( + reinterpret_cast(control()) % alignof(size_t) == 0); + ABSL_SWISSTABLE_ASSERT(has_infoz()); + return reinterpret_cast( + control() - ControlOffset(/*has_infoz=*/true)); + } + HashtablezInfoHandle infoz() { - return has_infoz() - ? *reinterpret_cast(backing_array_start()) - : HashtablezInfoHandle(); + return has_infoz() ? *infoz_ptr() : HashtablezInfoHandle(); } void set_infoz(HashtablezInfoHandle infoz) { ABSL_SWISSTABLE_ASSERT(has_infoz()); - *reinterpret_cast(backing_array_start()) = infoz; + *infoz_ptr() = infoz; } bool should_rehash_for_bug_detection_on_insert() const { @@ -3430,16 +3435,13 @@ class raw_hash_set { // // See `CapacityToGrowth()`. size_t growth_left() const { - ABSL_SWISSTABLE_ASSERT(!is_soo()); return common().growth_left(); } GrowthInfo& growth_info() { - ABSL_SWISSTABLE_ASSERT(!is_soo()); return common().growth_info(); } GrowthInfo growth_info() const { - ABSL_SWISSTABLE_ASSERT(!is_soo()); return common().growth_info(); } @@ -3485,7 +3487,6 @@ class raw_hash_set { SooEnabled() ? common().set_empty_soo() : common().decrement_size(); if (!SooEnabled()) { SanitizerPoisonObject(single_slot()); - growth_info().OverwriteFullAsEmpty(); } } iterator single_iterator() { From 9c02e2cbe4174d4c410c3f6c20700f9975189c93 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Fri, 6 Jun 2025 12:11:16 -0700 Subject: [PATCH 070/107] Remove redundant comments that just name the following symbol without providing additional information. PiperOrigin-RevId: 768168977 Change-Id: Ic7fb411878e303a0749cd00e3ebf19d02b745546 --- absl/hash/internal/hash.h | 54 +++------------------------------------ 1 file changed, 4 insertions(+), 50 deletions(-) diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 30aede24796..7825219db7a 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -104,8 +104,6 @@ namespace hash_internal { // returns the size of these chunks. constexpr size_t PiecewiseChunkSize() { return 1024; } -// PiecewiseCombiner -// // PiecewiseCombiner is an internal-only helper class for hashing a piecewise // buffer of `char` or `unsigned char` as though it were contiguous. This class // provides two methods: @@ -130,8 +128,6 @@ class PiecewiseCombiner { PiecewiseCombiner(const PiecewiseCombiner&) = delete; PiecewiseCombiner& operator=(const PiecewiseCombiner&) = delete; - // PiecewiseCombiner::add_buffer() - // // Appends the given range of bytes to the sequence to be hashed, which may // modify the provided hash state. template @@ -142,8 +138,6 @@ class PiecewiseCombiner { reinterpret_cast(data), size); } - // PiecewiseCombiner::finalize() - // // Finishes combining the hash sequence, which may may modify the provided // hash state. // @@ -160,18 +154,15 @@ class PiecewiseCombiner { bool added_something_ = false; }; -// is_hashable() -// // Trait class which returns true if T is hashable by the absl::Hash framework. // Used for the AbslHashValue implementations for composite types below. template struct is_hashable; -// HashStateBase -// -// An internal implementation detail that contains common implementation details -// for all of the "hash state objects" objects generated by Abseil. This is not -// a public API; users should not create classes that inherit from this. +// HashStateBase is an internal implementation detail that contains common +// implementation details for all of the "hash state objects" objects generated +// by Abseil. This is not a public API; users should not create classes that +// inherit from this. // // A hash state object is the template argument `H` passed to `AbslHashValue`. // It represents an intermediate state in the computation of an unspecified hash @@ -236,8 +227,6 @@ struct is_hashable; template class HashStateBase { public: - // HashStateBase::combine() - // // Combines an arbitrary number of values into a hash state, returning the // updated state. // @@ -257,8 +246,6 @@ class HashStateBase { static H combine(H state, const T& value, const Ts&... values); static H combine(H state) { return state; } - // HashStateBase::combine_contiguous() - // // Combines a contiguous array of `size` elements into a hash state, returning // the updated state. // @@ -298,8 +285,6 @@ class HashStateBase { }; }; -// is_uniquely_represented -// // `is_uniquely_represented` is a trait class that indicates whether `T` // is uniquely represented. // @@ -334,8 +319,6 @@ class HashStateBase { template struct is_uniquely_represented : std::false_type {}; -// is_uniquely_represented -// // unsigned char is a synonym for "byte", so it is guaranteed to be // uniquely represented. template <> @@ -350,9 +333,6 @@ struct is_uniquely_represented< Integral, typename std::enable_if::value>::type> : std::true_type {}; -// is_uniquely_represented -// -// template <> struct is_uniquely_represented : std::false_type {}; @@ -374,8 +354,6 @@ struct CombineRaw { } }; -// hash_bytes() -// // Convenience function that combines `hash_state` with the byte representation // of `value`. template & p) { return H::combine(std::move(hash_state), p.first, p.second); } -// hash_tuple() -// // Helper function for hashing a tuple. The third argument should // be an index_sequence running from 0 to tuple_size - 1. template @@ -881,7 +857,6 @@ typename std::enable_if::value, H>::type AbslHashValue( return H::combine(std::move(hash_state), opt.has_value()); } -// VariantVisitor template struct VariantVisitor { H&& hash_state; @@ -930,8 +905,6 @@ H AbslHashValue(H hash_state, const std::bitset& set) { // ----------------------------------------------------------------------------- -// hash_range_or_bytes() -// // Mixes all values in the range [data, data+size) into the hash state. // This overload accepts only uniquely-represented types, and hashes them by // hashing the entire range of bytes. @@ -942,7 +915,6 @@ hash_range_or_bytes(H hash_state, const T* data, size_t size) { return H::combine_contiguous(std::move(hash_state), bytes, sizeof(T) * size); } -// hash_range_or_bytes() template typename std::enable_if::value, H>::type hash_range_or_bytes(H hash_state, const T* data, size_t size) { @@ -975,8 +947,6 @@ inline uint64_t PrecombineLengthMix(uint64_t state, size_t len) { #define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 0 #endif -// HashSelect -// // Type trait to select the appropriate hash implementation to use. // HashSelect::type will give the proper hash implementation, to be invoked // as: @@ -1073,7 +1043,6 @@ template struct is_hashable : std::integral_constant::value> {}; -// MixingHashState class ABSL_DLL MixingHashState : public HashStateBase { // absl::uint128 is not an alias or a thin wrapper around the intrinsic. // We use the intrinsic when available to improve performance. @@ -1096,8 +1065,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { MixingHashState(MixingHashState&&) = default; MixingHashState& operator=(MixingHashState&&) = default; - // MixingHashState::combine_contiguous() - // // Fundamental base case for hash recursion: mixes the given range of bytes // into the hash state. static MixingHashState combine_contiguous(MixingHashState hash_state, @@ -1109,8 +1076,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { } using MixingHashState::HashStateBase::combine_contiguous; - // MixingHashState::hash() - // // For performance reasons in non-opt mode, we specialize this for // integral types. // Otherwise we would be instantiating and calling dozens of functions for @@ -1122,7 +1087,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { Mix(Seed() ^ static_cast>(value), kMul)); } - // Overload of MixingHashState::hash() template ::value, int> = 0> static size_t hash(const T& value) { return static_cast(combine(MixingHashState{}, value).state_); @@ -1332,8 +1296,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { #endif } - // Seed() - // // A non-deterministic seed. // // The current purpose of this seed is to generate non-deterministic results @@ -1364,7 +1326,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { uint64_t state_; }; -// MixingHashState::CombineContiguousImpl() inline uint64_t MixingHashState::CombineContiguousImpl( uint64_t state, const unsigned char* first, size_t len, std::integral_constant /* sizeof_size_t */) { @@ -1384,7 +1345,6 @@ inline uint64_t MixingHashState::CombineContiguousImpl( return CombineLargeContiguousImpl32(first, len, state); } -// Overload of MixingHashState::CombineContiguousImpl() inline uint64_t MixingHashState::CombineContiguousImpl( uint64_t state, const unsigned char* first, size_t len, std::integral_constant /* sizeof_size_t */) { @@ -1414,8 +1374,6 @@ inline uint64_t MixingHashState::CombineContiguousImpl( struct AggregateBarrier {}; -// HashImpl - // Add a private base class to make sure this type is not an aggregate. // Aggregates can be aggregate initialized even if the default constructor is // deleted. @@ -1444,14 +1402,12 @@ H HashStateBase::combine(H state, const T& value, const Ts&... values) { values...); } -// HashStateBase::combine_contiguous() template template H HashStateBase::combine_contiguous(H state, const T* data, size_t size) { return hash_internal::hash_range_or_bytes(std::move(state), data, size); } -// HashStateBase::combine_unordered() template template H HashStateBase::combine_unordered(H state, I begin, I end) { @@ -1459,7 +1415,6 @@ H HashStateBase::combine_unordered(H state, I begin, I end) { CombineUnorderedCallback{begin, end}); } -// HashStateBase::PiecewiseCombiner::add_buffer() template H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, size_t size) { @@ -1492,7 +1447,6 @@ H PiecewiseCombiner::add_buffer(H state, const unsigned char* data, return state; } -// HashStateBase::PiecewiseCombiner::finalize() template H PiecewiseCombiner::finalize(H state) { // Do not call combine_contiguous with empty remainder since it is modifying From 76fd1e96c71ad20fe08f1b7d18c6c55e197df85e Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 9 Jun 2025 14:27:06 -0700 Subject: [PATCH 071/107] Delete unused function ShouldSampleHashtablezInfoOnResize. PiperOrigin-RevId: 769294572 Change-Id: I40b3f5f856e05deed346474674d2be25c266261f --- absl/container/internal/raw_hash_set.h | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 822815114ea..7106bc81953 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1577,27 +1577,6 @@ constexpr bool ShouldSampleHashtablezInfoForAlloc() { return std::is_same_v>; } -template -bool ShouldSampleHashtablezInfoOnResize(bool force_sampling, - bool is_hashtablez_eligible, - size_t old_capacity, CommonFields& c) { - if (!is_hashtablez_eligible) return false; - // Force sampling is only allowed for SOO tables. - ABSL_SWISSTABLE_ASSERT(kSooEnabled || !force_sampling); - if (kSooEnabled && force_sampling) { - return true; - } - // In SOO, we sample on the first insertion so if this is an empty SOO case - // (e.g. when reserve is called), then we still need to sample. - if (kSooEnabled && old_capacity == SooCapacity() && c.empty()) { - return ShouldSampleNextTable(); - } - if (!kSooEnabled && old_capacity == 0) { - return ShouldSampleNextTable(); - } - return false; -} - // Allocates `n` bytes for a backing array. template ABSL_ATTRIBUTE_NOINLINE void* AllocateBackingArray(void* alloc, size_t n) { From 9ac131cf7da4de8b19b4c956a0c13edba74d92d9 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 10 Jun 2025 09:35:00 -0700 Subject: [PATCH 072/107] Reorder members of MixingHashState to comply with Google C++ style guide ordering of type declarations, static constants, ctors, non-ctor functions. PiperOrigin-RevId: 769667612 Change-Id: I1ebee3bedbb5fb408a7a213fc6076db7afad1ce5 --- absl/hash/internal/hash.h | 59 ++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 7825219db7a..5802064949f 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -1052,14 +1052,14 @@ class ABSL_DLL MixingHashState : public HashStateBase { using uint128 = absl::uint128; #endif // ABSL_HAVE_INTRINSIC_INT128 - static constexpr uint64_t kMul = - uint64_t{0xdcb22ca68cb134ed}; - template using IntegralFastPath = conjunction, is_uniquely_represented, FitsIn64Bits>; + static constexpr uint64_t kMul = + uint64_t{0xdcb22ca68cb134ed}; + public: // Move only MixingHashState(MixingHashState&&) = default; @@ -1093,14 +1093,37 @@ class ABSL_DLL MixingHashState : public HashStateBase { } private: - // Invoked only once for a given argument; that plus the fact that this is - // move-only ensures that there is only one non-moved-from object. - MixingHashState() : state_(Seed()) {} - friend class MixingHashState::HashStateBase; template friend H absl::hash_internal::hash_weakly_mixed_integer(H, WeaklyMixedInteger); + // Allow the HashState type-erasure implementation to invoke + // RunCombinedUnordered() directly. + friend class absl::HashState; + friend struct CombineRaw; + + // For use in Seed(). + static const void* const kSeed; + + // Invoked only once for a given argument; that plus the fact that this is + // move-only ensures that there is only one non-moved-from object. + MixingHashState() : state_(Seed()) {} + + // Workaround for MSVC bug. + // We make the type copyable to fix the calling convention, even though we + // never actually copy it. Keep it private to not affect the public API of the + // type. + MixingHashState(const MixingHashState&) = default; + + explicit MixingHashState(uint64_t state) : state_(state) {} + + // Combines a raw value from e.g. integrals/floats/pointers/etc. This allows + // us to be consistent with IntegralFastPath when combining raw types, but + // optimize Read1To3 and Read4To8 differently for the string case. + static MixingHashState combine_raw(MixingHashState hash_state, + uint64_t value) { + return MixingHashState(Mix(hash_state.state_ ^ value, kMul)); + } static MixingHashState combine_weakly_mixed_integer( MixingHashState hash_state, WeaklyMixedInteger value) { @@ -1130,27 +1153,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { return MixingHashState::combine(std::move(state), unordered_state); } - // Allow the HashState type-erasure implementation to invoke - // RunCombinedUnordered() directly. - friend class absl::HashState; - friend struct CombineRaw; - - // Workaround for MSVC bug. - // We make the type copyable to fix the calling convention, even though we - // never actually copy it. Keep it private to not affect the public API of the - // type. - MixingHashState(const MixingHashState&) = default; - - explicit MixingHashState(uint64_t state) : state_(state) {} - - // Combines a raw value from e.g. integrals/floats/pointers/etc. This allows - // us to be consistent with IntegralFastPath when combining raw types, but - // optimize Read1To3 and Read4To8 differently for the string case. - static MixingHashState combine_raw(MixingHashState hash_state, - uint64_t value) { - return MixingHashState(Mix(hash_state.state_ ^ value, kMul)); - } - // Implementation of the base case for combine_contiguous where we actually // mix the bytes into the state. // Dispatch to different implementations of the combine_contiguous depending @@ -1321,7 +1323,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { return static_cast(reinterpret_cast(kSeed)); #endif } - static const void* const kSeed; uint64_t state_; }; From f8288c18a12bbe158dcdee5cb9030efd109e7858 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Wed, 11 Jun 2025 14:20:51 -0700 Subject: [PATCH 073/107] rotr/rotl: Fix undefined behavior when passing INT_MIN as the number of positions to rotate by Previously the code was negating INT_MIN, which is undefined PiperOrigin-RevId: 770318129 Change-Id: Iff94b0e3d5777b2f488f2d48b6f8220f47bdada3 --- absl/numeric/bits_test.cc | 6 ++++++ absl/numeric/internal/bits.h | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/absl/numeric/bits_test.cc b/absl/numeric/bits_test.cc index 2977976d198..e2c64096658 100644 --- a/absl/numeric/bits_test.cc +++ b/absl/numeric/bits_test.cc @@ -151,6 +151,9 @@ TEST(Rotate, Left) { EXPECT_EQ(rotl(uint32_t{0x12345678UL}, -4), uint32_t{0x81234567UL}); EXPECT_EQ(rotl(uint64_t{0x12345678ABCDEF01ULL}, -4), uint64_t{0x112345678ABCDEF0ULL}); + + EXPECT_EQ(rotl(uint32_t{1234}, std::numeric_limits::min()), + uint32_t{1234}); } TEST(Rotate, Right) { @@ -190,6 +193,9 @@ TEST(Rotate, Right) { EXPECT_EQ(rotr(uint32_t{0x12345678UL}, -4), uint32_t{0x23456781UL}); EXPECT_EQ(rotr(uint64_t{0x12345678ABCDEF01ULL}, -4), uint64_t{0x2345678ABCDEF011ULL}); + + EXPECT_EQ(rotl(uint32_t{1234}, std::numeric_limits::min()), + uint32_t{1234}); } TEST(Rotate, Symmetry) { diff --git a/absl/numeric/internal/bits.h b/absl/numeric/internal/bits.h index e1d18b86334..e6815445ab7 100644 --- a/absl/numeric/internal/bits.h +++ b/absl/numeric/internal/bits.h @@ -77,8 +77,28 @@ template static_assert(IsPowerOf2(std::numeric_limits::digits), "T must have a power-of-2 size"); - return static_cast(x >> (s & (std::numeric_limits::digits - 1))) | - static_cast(x << ((-s) & (std::numeric_limits::digits - 1))); + // Rotate by s mod the number of digits to avoid unnecessary rotations. + // + // A negative s represents a left rotation instead of a right rotation. + // We compute it as an equivalent complementary right rotation by leveraging + // its two's complement representation. + // + // For example, suppose we rotate a 3-bit number by -2. + // In that case: + // * s = 0b11111111111111111111111111111110 + // * n = 8 + // * r = (0b11111111111111111111111111111110 & 0b111) = 0b110 + // + // Instead of rotating by 2 to the left, we rotate by 6 to the right, which + // is equivalent. + const int n = std::numeric_limits::digits; + const int r = s & (n - 1); + + if (r == 0) { + return x; + } else { + return (x >> r) | (x << (n - r)); + } } template @@ -88,8 +108,16 @@ template static_assert(IsPowerOf2(std::numeric_limits::digits), "T must have a power-of-2 size"); - return static_cast(x << (s & (std::numeric_limits::digits - 1))) | - static_cast(x >> ((-s) & (std::numeric_limits::digits - 1))); + // Rotate by s mod the number of digits to avoid unnecessary rotations. + // See comment in RotateRight for a detailed explanation of the logic below. + const int n = std::numeric_limits::digits; + const int r = s & (n - 1); + + if (r == 0) { + return x; + } else { + return (x << r) | (x >> (n - r)); + } } ABSL_ATTRIBUTE_ALWAYS_INLINE ABSL_INTERNAL_CONSTEXPR_POPCOUNT inline int From 2ea5334068f11664a71d1d9dfb9a475482fa05f5 Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Thu, 12 Jun 2025 08:47:30 -0700 Subject: [PATCH 074/107] Refactor long strings hash computations and move `len <= PiecewiseChunkSize()` out of the line to keep only one function call in the inlined hash code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I am moving low level hash to hash.cc. It is needed to allow the compiler to inline low level hash and avoid two function calls. I also find it easier to follow. ``` name old CYCLES/op new CYCLES/op delta BM_latency_AbslHash_Int32 16.0 ± 3% 16.1 ± 3% ~ (p=0.285 n=151+150) BM_latency_AbslHash_Int64 16.4 ± 5% 16.5 ± 4% ~ (p=0.073 n=152+144) BM_latency_AbslHash_String3 21.1 ± 0% 21.1 ± 0% -0.07% (p=0.000 n=132+140) BM_latency_AbslHash_String5 21.1 ±11% 21.1 ±10% ~ (p=0.862 n=152+154) BM_latency_AbslHash_String9 21.0 ±13% 20.7 ±14% -1.27% (p=0.006 n=151+147) BM_latency_AbslHash_String17 19.9 ±11% 20.5 ±21% +3.27% (p=0.033 n=131+144) BM_latency_AbslHash_String33 21.1 ± 4% 21.2 ± 5% ~ (p=0.122 n=140+146) BM_latency_AbslHash_String65 26.9 ± 7% 27.2 ± 7% +1.06% (p=0.000 n=140+145) BM_latency_AbslHash_String257 44.4 ±10% 44.5 ±11% ~ (p=0.754 n=149+143) ``` PiperOrigin-RevId: 770672450 Change-Id: Ia6195c8a149cae89c8ca9013d4bf48052c09ba55 --- CMake/AbseilDll.cmake | 2 - absl/hash/BUILD.bazel | 20 +- absl/hash/CMakeLists.txt | 21 +- absl/hash/internal/hash.cc | 131 +++++++- absl/hash/internal/hash.h | 383 ++++++++++------------ absl/hash/internal/low_level_hash.cc | 106 ------ absl/hash/internal/low_level_hash.h | 57 ---- absl/hash/internal/low_level_hash_test.cc | 30 +- 8 files changed, 321 insertions(+), 429 deletions(-) delete mode 100644 absl/hash/internal/low_level_hash.cc delete mode 100644 absl/hash/internal/low_level_hash.h diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index e10a6acfa31..9f88825e144 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -159,8 +159,6 @@ set(ABSL_INTERNAL_DLL_FILES "hash/internal/hash.h" "hash/internal/hash.cc" "hash/internal/spy_hash_state.h" - "hash/internal/low_level_hash.h" - "hash/internal/low_level_hash.cc" "hash/internal/weakly_mixed_integer.h" "log/absl_check.h" "log/absl_log.h" diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index 8176cd92a0f..04882271605 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -43,11 +43,11 @@ cc_library( linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ ":city", - ":low_level_hash", ":weakly_mixed_integer", "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", + "//absl/base:prefetch", "//absl/container:fixed_array", "//absl/functional:function_ref", "//absl/meta:type_traits", @@ -187,22 +187,6 @@ cc_test( ], ) -cc_library( - name = "low_level_hash", - srcs = ["internal/low_level_hash.cc"], - hdrs = ["internal/low_level_hash.h"], - copts = ABSL_DEFAULT_COPTS, - linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = ["//visibility:private"], - deps = [ - "//absl/base:config", - "//absl/base:core_headers", - "//absl/base:endian", - "//absl/base:prefetch", - "//absl/numeric:int128", - ], -) - cc_library( name = "weakly_mixed_integer", hdrs = ["internal/weakly_mixed_integer.h"], @@ -225,7 +209,7 @@ cc_test( linkopts = ABSL_DEFAULT_LINKOPTS, visibility = ["//visibility:private"], deps = [ - ":low_level_hash", + ":hash", "//absl/strings", "@googletest//:gtest", "@googletest//:gtest_main", diff --git a/absl/hash/CMakeLists.txt b/absl/hash/CMakeLists.txt index 6996d930e72..b439e4ce2ee 100644 --- a/absl/hash/CMakeLists.txt +++ b/absl/hash/CMakeLists.txt @@ -38,7 +38,6 @@ absl_cc_library( absl::optional absl::variant absl::utility - absl::low_level_hash absl::weakly_mixed_integer PUBLIC ) @@ -153,24 +152,6 @@ absl_cc_test( GTest::gmock_main ) -# Internal-only target, do not depend on directly. -absl_cc_library( - NAME - low_level_hash - HDRS - "internal/low_level_hash.h" - SRCS - "internal/low_level_hash.cc" - COPTS - ${ABSL_DEFAULT_COPTS} - DEPS - absl::config - absl::core_headers - absl::endian - absl::int128 - absl::prefetch -) - # Internal-only target, do not depend on directly. absl_cc_library( NAME @@ -191,7 +172,7 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS - absl::low_level_hash + absl::hash absl::strings GTest::gmock_main ) diff --git a/absl/hash/internal/hash.cc b/absl/hash/internal/hash.cc index 1b47e614d4a..87d2061c6a2 100644 --- a/absl/hash/internal/hash.cc +++ b/absl/hash/internal/hash.cc @@ -14,27 +14,102 @@ #include "absl/hash/internal/hash.h" +#include #include #include #include #include "absl/base/attributes.h" #include "absl/base/config.h" +#include "absl/base/internal/unaligned_access.h" +#include "absl/base/optimization.h" +#include "absl/base/prefetch.h" #include "absl/hash/internal/city.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace hash_internal { -uint64_t MixingHashState::CombineLargeContiguousImpl32( - const unsigned char* first, size_t len, uint64_t state) { +namespace { + +uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) { + uint64_t a = absl::base_internal::UnalignedLoad64(ptr); + uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8); + uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16); + uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24); + + uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state); + uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state); + return cs0 ^ cs1; +} + +[[maybe_unused]] uint64_t LowLevelHashLenGt32(const void* data, size_t len, + uint64_t seed) { + assert(len > 32); + const uint8_t* ptr = static_cast(data); + uint64_t current_state = seed ^ kStaticRandomData[0] ^ len; + const uint8_t* last_32_ptr = ptr + len - 32; + + if (len > 64) { + // If we have more than 64 bytes, we're going to handle chunks of 64 + // bytes at a time. We're going to build up four separate hash states + // which we will then hash together. This avoids short dependency chains. + uint64_t duplicated_state0 = current_state; + uint64_t duplicated_state1 = current_state; + uint64_t duplicated_state2 = current_state; + + do { + // Always prefetch the next cacheline. + PrefetchToLocalCache(ptr + ABSL_CACHELINE_SIZE); + + uint64_t a = absl::base_internal::UnalignedLoad64(ptr); + uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8); + uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16); + uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24); + uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32); + uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40); + uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48); + uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56); + + current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state); + duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0); + + duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1); + duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2); + + ptr += 64; + len -= 64; + } while (len > 64); + + current_state = (current_state ^ duplicated_state0) ^ + (duplicated_state1 + duplicated_state2); + } + + // We now have a data `ptr` with at most 64 bytes and the current state + // of the hashing state machine stored in current_state. + if (len > 32) { + current_state = Mix32Bytes(ptr, current_state); + } + + // We now have a data `ptr` with at most 32 bytes and the current state + // of the hashing state machine stored in current_state. But we can + // safely read from `ptr + len - 32`. + return Mix32Bytes(last_32_ptr, current_state); +} + +ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit( + const unsigned char* data, size_t len, uint64_t state) { + // TODO(b/417141985): expose and use CityHash32WithSeed. + return Mix( + PrecombineLengthMix(state, len) ^ + hash_internal::CityHash32(reinterpret_cast(data), len), + kMul); +} + +ABSL_ATTRIBUTE_NOINLINE uint64_t +SplitAndCombineOn32Bit(const unsigned char* first, size_t len, uint64_t state) { while (len >= PiecewiseChunkSize()) { - // TODO(b/417141985): avoid code duplication with CombineContiguousImpl. - state = - Mix(PrecombineLengthMix(state, PiecewiseChunkSize()) ^ - hash_internal::CityHash32(reinterpret_cast(first), - PiecewiseChunkSize()), - kMul); + state = HashBlockOn32Bit(first, PiecewiseChunkSize(), state); len -= PiecewiseChunkSize(); first += PiecewiseChunkSize(); } @@ -48,10 +123,20 @@ uint64_t MixingHashState::CombineLargeContiguousImpl32( std::integral_constant{}); } -uint64_t MixingHashState::CombineLargeContiguousImpl64( - const unsigned char* first, size_t len, uint64_t state) { +ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn64Bit( + const unsigned char* data, size_t len, uint64_t state) { +#ifdef ABSL_HAVE_INTRINSIC_INT128 + return LowLevelHashLenGt32(data, len, state); +#else + return hash_internal::CityHash64WithSeed(reinterpret_cast(data), + len, state); +#endif +} + +ABSL_ATTRIBUTE_NOINLINE uint64_t +SplitAndCombineOn64Bit(const unsigned char* first, size_t len, uint64_t state) { while (len >= PiecewiseChunkSize()) { - state = Hash64(first, PiecewiseChunkSize(), state); + state = HashBlockOn64Bit(first, PiecewiseChunkSize(), state); len -= PiecewiseChunkSize(); first += PiecewiseChunkSize(); } @@ -65,6 +150,30 @@ uint64_t MixingHashState::CombineLargeContiguousImpl64( std::integral_constant{}); } +} // namespace + +uint64_t CombineLargeContiguousImplOn32BitLengthGt8(const unsigned char* first, + size_t len, + uint64_t state) { + assert(len > 8); + assert(sizeof(size_t) == 4); // NOLINT(misc-static-assert) + if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { + return HashBlockOn32Bit(first, len, state); + } + return SplitAndCombineOn32Bit(first, len, state); +} + +uint64_t CombineLargeContiguousImplOn64BitLengthGt32(const unsigned char* first, + size_t len, + uint64_t state) { + assert(len > 32); + assert(sizeof(size_t) == 8); // NOLINT(misc-static-assert) + if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { + return HashBlockOn64Bit(first, len, state); + } + return SplitAndCombineOn64Bit(first, len, state); +} + ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed; } // namespace hash_internal diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 5802064949f..9eacccdd430 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -79,7 +79,6 @@ #include "absl/base/port.h" #include "absl/container/fixed_array.h" #include "absl/hash/internal/city.h" -#include "absl/hash/internal/low_level_hash.h" #include "absl/hash/internal/weakly_mixed_integer.h" #include "absl/meta/type_traits.h" #include "absl/numeric/bits.h" @@ -940,6 +939,186 @@ inline uint64_t PrecombineLengthMix(uint64_t state, size_t len) { return state + (uint64_t{len} << 24); } + inline constexpr uint64_t kMul = uint64_t{0xdcb22ca68cb134ed}; + +// Random data taken from the hexadecimal digits of Pi's fractional component. +// https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number +ABSL_CACHELINE_ALIGNED inline constexpr uint64_t kStaticRandomData[] = { + 0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0, + 0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, +}; + +ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t Mix(uint64_t lhs, uint64_t rhs) { + // For 32 bit platforms we are trying to use all 64 lower bits. + if constexpr (sizeof(size_t) < 8) { + uint64_t m = lhs * rhs; + return m ^ (m >> 32); + } + // absl::uint128 is not an alias or a thin wrapper around the intrinsic. + // We use the intrinsic when available to improve performance. + // TODO(b/399425325): Try to remove MulType since compiler seem to generate + // the same code with just absl::uint128. + // See https://gcc.godbolt.org/z/s3hGarraG for details. +#ifdef ABSL_HAVE_INTRINSIC_INT128 + using MulType = __uint128_t; +#else // ABSL_HAVE_INTRINSIC_INT128 + using MulType = absl::uint128; +#endif // ABSL_HAVE_INTRINSIC_INT128 + // Though the 128-bit product on AArch64 needs two instructions, it is + // still a good balance between speed and hash quality. + MulType m = lhs; + m *= rhs; + return Uint128High64(m) ^ Uint128Low64(m); +} + +// Reads 8 bytes from p. +inline uint64_t Read8(const unsigned char* p) { +// Suppress erroneous array bounds errors on GCC. +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif + return absl::base_internal::UnalignedLoad64(p); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + +// Reads 9 to 16 bytes from p. +// The first 8 bytes are in .first, and the rest of the bytes are in .second +// along with duplicated bytes from .first if len<16. +inline std::pair Read9To16(const unsigned char* p, + size_t len) { + return {Read8(p), Read8(p + len - 8)}; +} + +// Reads 4 to 8 bytes from p. +// Bytes are permuted and some input bytes may be duplicated in output. +inline uint64_t Read4To8(const unsigned char* p, size_t len) { + // If `len < 8`, we duplicate bytes. We always put low memory at the end. + // E.g., on little endian platforms: + // `ABCD` will be read as `ABCDABCD`. + // `ABCDE` will be read as `BCDEABCD`. + // `ABCDEF` will be read as `CDEFABCD`. + // `ABCDEFG` will be read as `DEFGABCD`. + // `ABCDEFGH` will be read as `EFGHABCD`. + // We also do not care about endianness. On big-endian platforms, bytes will + // be permuted differently. We always shift low memory by 32, because that + // can be pipelined earlier. Reading high memory requires computing + // `p + len - 4`. + uint64_t most_significant = + static_cast(absl::base_internal::UnalignedLoad32(p)) << 32; + uint64_t least_significant = + absl::base_internal::UnalignedLoad32(p + len - 4); + return most_significant | least_significant; +} + +// Reads 1 to 3 bytes from p. Some input bytes may be duplicated in output. +inline uint32_t Read1To3(const unsigned char* p, size_t len) { + // The trick used by this implementation is to avoid branches. + // We always read three bytes by duplicating. + // E.g., + // `A` is read as `AAA`. + // `AB` is read as `ABB`. + // `ABC` is read as `ABC`. + // We always shift `p[0]` so that it can be pipelined better. + // Other bytes require extra computation to find indices. + uint32_t mem0 = (static_cast(p[0]) << 16) | p[len - 1]; + uint32_t mem1 = static_cast(p[len / 2]) << 8; + return mem0 | mem1; +} + +// Slow dispatch path for calls to CombineContiguousImpl with a size argument +// larger than inlined size. Has the same effect as calling +// CombineContiguousImpl() repeatedly with the chunk stride size. +uint64_t CombineLargeContiguousImplOn32BitLengthGt8(const unsigned char* first, + size_t len, uint64_t state); +uint64_t CombineLargeContiguousImplOn64BitLengthGt32(const unsigned char* first, + size_t len, + uint64_t state); + +ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineSmallContiguousImpl( + uint64_t state, const unsigned char* first, size_t len) { + ABSL_ASSUME(len <= 8); + uint64_t v; + if (len >= 4) { + v = Read4To8(first, len); + } else if (len > 0) { + v = Read1To3(first, len); + } else { + // Empty string must modify the state. + v = 0x57; + } + return Mix(state ^ v, kMul); +} + +ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineContiguousImpl9to16( + uint64_t state, const unsigned char* first, size_t len) { + ABSL_ASSUME(len >= 9); + ABSL_ASSUME(len <= 16); + // Note: any time one half of the mix function becomes zero it will fail to + // incorporate any bits from the other half. However, there is exactly 1 in + // 2^64 values for each side that achieve this, and only when the size is + // exactly 16 -- for smaller sizes there is an overlapping byte that makes + // this impossible unless the seed is *also* incredibly unlucky. + auto p = Read9To16(first, len); + return Mix(state ^ p.first, kMul ^ p.second); +} + +ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineContiguousImpl17to32( + uint64_t state, const unsigned char* first, size_t len) { + ABSL_ASSUME(len >= 17); + ABSL_ASSUME(len <= 32); + // Do two mixes of overlapping 16-byte ranges in parallel to minimize + // latency. + const uint64_t m0 = + Mix(Read8(first) ^ kStaticRandomData[1], Read8(first + 8) ^ state); + + const unsigned char* tail_16b_ptr = first + (len - 16); + const uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ kStaticRandomData[3], + Read8(tail_16b_ptr + 8) ^ state); + return m0 ^ m1; +} + +// Implementation of the base case for combine_contiguous where we actually +// mix the bytes into the state. +// Dispatch to different implementations of combine_contiguous depending +// on the value of `sizeof(size_t)`. +inline uint64_t CombineContiguousImpl( + uint64_t state, const unsigned char* first, size_t len, + std::integral_constant /* sizeof_size_t */) { + // For large values we use CityHash, for small ones we use custom low latency + // hash. + if (len <= 8) { + return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first, + len); + } + return CombineLargeContiguousImplOn32BitLengthGt8(first, len, state); +} + +inline uint64_t CombineContiguousImpl( + uint64_t state, const unsigned char* first, size_t len, + std::integral_constant /* sizeof_size_t */) { + // For large values we use LowLevelHash or CityHash depending on the platform, + // for small ones we use custom low latency hash. + if (len <= 8) { + return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first, + len); + } + if (len <= 16) { + return CombineContiguousImpl9to16(PrecombineLengthMix(state, len), first, + len); + } + if (len <= 32) { + return CombineContiguousImpl17to32(PrecombineLengthMix(state, len), first, + len); + } + // We must not mix length into the state here because calling + // CombineContiguousImpl twice with PiecewiseChunkSize() must be equivalent + // to calling CombineLargeContiguousImpl once with 2 * PiecewiseChunkSize(). + return CombineLargeContiguousImplOn64BitLengthGt32(first, len, state); +} + #if defined(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE) && \ ABSL_META_INTERNAL_STD_HASH_SFINAE_FRIENDLY_ #define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 1 @@ -1044,22 +1223,11 @@ struct is_hashable : std::integral_constant::value> {}; class ABSL_DLL MixingHashState : public HashStateBase { - // absl::uint128 is not an alias or a thin wrapper around the intrinsic. - // We use the intrinsic when available to improve performance. -#ifdef ABSL_HAVE_INTRINSIC_INT128 - using uint128 = __uint128_t; -#else // ABSL_HAVE_INTRINSIC_INT128 - using uint128 = absl::uint128; -#endif // ABSL_HAVE_INTRINSIC_INT128 - template using IntegralFastPath = conjunction, is_uniquely_represented, FitsIn64Bits>; - static constexpr uint64_t kMul = - uint64_t{0xdcb22ca68cb134ed}; - public: // Move only MixingHashState(MixingHashState&&) = default; @@ -1153,151 +1321,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { return MixingHashState::combine(std::move(state), unordered_state); } - // Implementation of the base case for combine_contiguous where we actually - // mix the bytes into the state. - // Dispatch to different implementations of the combine_contiguous depending - // on the value of `sizeof(size_t)`. - static uint64_t CombineContiguousImpl(uint64_t state, - const unsigned char* first, size_t len, - std::integral_constant - /* sizeof_size_t */); - static uint64_t CombineContiguousImpl(uint64_t state, - const unsigned char* first, size_t len, - std::integral_constant - /* sizeof_size_t */); - - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineSmallContiguousImpl( - uint64_t state, const unsigned char* first, size_t len) { - ABSL_ASSUME(len <= 8); - uint64_t v; - if (len >= 4) { - v = Read4To8(first, len); - } else if (len > 0) { - v = Read1To3(first, len); - } else { - // Empty string must modify the state. - v = 0x57; - } - return Mix(state ^ v, kMul); - } - - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineContiguousImpl9to16( - uint64_t state, const unsigned char* first, size_t len) { - ABSL_ASSUME(len >= 9); - ABSL_ASSUME(len <= 16); - // Note: any time one half of the mix function becomes zero it will fail to - // incorporate any bits from the other half. However, there is exactly 1 in - // 2^64 values for each side that achieve this, and only when the size is - // exactly 16 -- for smaller sizes there is an overlapping byte that makes - // this impossible unless the seed is *also* incredibly unlucky. - auto p = Read9To16(first, len); - return Mix(state ^ p.first, kMul ^ p.second); - } - - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineContiguousImpl17to32( - uint64_t state, const unsigned char* first, size_t len) { - ABSL_ASSUME(len >= 17); - ABSL_ASSUME(len <= 32); - // Do two mixes of overlapping 16-byte ranges in parallel to minimize - // latency. - const uint64_t m0 = - Mix(Read8(first) ^ kStaticRandomData[1], Read8(first + 8) ^ state); - - const unsigned char* tail_16b_ptr = first + (len - 16); - const uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ kStaticRandomData[3], - Read8(tail_16b_ptr + 8) ^ state); - return m0 ^ m1; - } - - // Slow dispatch path for calls to CombineContiguousImpl with a size argument - // larger than PiecewiseChunkSize(). Has the same effect as calling - // CombineContiguousImpl() repeatedly with the chunk stride size. - static uint64_t CombineLargeContiguousImpl32(const unsigned char* first, - size_t len, uint64_t state); - static uint64_t CombineLargeContiguousImpl64(const unsigned char* first, - size_t len, uint64_t state); - - // Reads 9 to 16 bytes from p. - // The first 8 bytes are in .first, and the rest of the bytes are in .second - // along with duplicated bytes from .first if len<16. - static std::pair Read9To16(const unsigned char* p, - size_t len) { - return {Read8(p), Read8(p + len - 8)}; - } - - // Reads 8 bytes from p. - static uint64_t Read8(const unsigned char* p) { - // Suppress erroneous array bounds errors on GCC. -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif - return absl::base_internal::UnalignedLoad64(p); -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic pop -#endif - } - - // Reads 4 to 8 bytes from p. - // Bytes are permuted and some input bytes may be duplicated in output. - static uint64_t Read4To8(const unsigned char* p, size_t len) { - // If `len < 8`, we duplicate bytes. We always put low memory at the end. - // E.g., on little endian platforms: - // `ABCD` will be read as `ABCDABCD`. - // `ABCDE` will be read as `BCDEABCD`. - // `ABCDEF` will be read as `CDEFABCD`. - // `ABCDEFG` will be read as `DEFGABCD`. - // `ABCDEFGH` will be read as `EFGHABCD`. - // We also do not care about endianness. On big-endian platforms, bytes will - // be permuted differently. We always shift low memory by 32, because that - // can be pipelined earlier. Reading high memory requires computing - // `p + len - 4`. - uint64_t most_significant = - static_cast(absl::base_internal::UnalignedLoad32(p)) << 32; - uint64_t least_significant = - absl::base_internal::UnalignedLoad32(p + len - 4); - return most_significant | least_significant; - } - - // Reads 1 to 3 bytes from p. Some input bytes may be duplicated in output. - static uint32_t Read1To3(const unsigned char* p, size_t len) { - // The trick used by this implementation is to avoid branches. - // We always read three bytes by duplicating. - // E.g., - // `A` is read as `AAA`. - // `AB` is read as `ABB`. - // `ABC` is read as `ABC`. - // We always shift `p[0]` so that it can be pipelined better. - // Other bytes require extra computation to find indices. - uint32_t mem0 = (static_cast(p[0]) << 16) | p[len - 1]; - uint32_t mem1 = static_cast(p[len / 2]) << 8; - return mem0 | mem1; - } - - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t lhs, uint64_t rhs) { - // For 32 bit platforms we are trying to use all 64 lower bits. - if constexpr (sizeof(size_t) < 8) { - uint64_t m = lhs * rhs; - return m ^ (m >> 32); - } - // Though the 128-bit product on AArch64 needs two instructions, it is - // still a good balance between speed and hash quality. - uint128 m = lhs; - m *= rhs; - return Uint128High64(m) ^ Uint128Low64(m); - } - - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data, - size_t len, - uint64_t state) { -#ifdef ABSL_HAVE_INTRINSIC_INT128 - return LowLevelHashLenGt32(data, len, state); -#else - return hash_internal::CityHash64WithSeed( - reinterpret_cast(data), len, state); -#endif - } - // A non-deterministic seed. // // The current purpose of this seed is to generate non-deterministic results @@ -1327,52 +1350,6 @@ class ABSL_DLL MixingHashState : public HashStateBase { uint64_t state_; }; -inline uint64_t MixingHashState::CombineContiguousImpl( - uint64_t state, const unsigned char* first, size_t len, - std::integral_constant /* sizeof_size_t */) { - // For large values we use CityHash, for small ones we use custom low latency - // hash. - if (len <= 8) { - return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first, - len); - } - if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { - // TODO(b/417141985): expose and use CityHash32WithSeed. - return Mix(PrecombineLengthMix(state, len) ^ - hash_internal::CityHash32( - reinterpret_cast(first), len), - kMul); - } - return CombineLargeContiguousImpl32(first, len, state); -} - -inline uint64_t MixingHashState::CombineContiguousImpl( - uint64_t state, const unsigned char* first, size_t len, - std::integral_constant /* sizeof_size_t */) { - // For large values we use LowLevelHash or CityHash depending on the platform, - // for small ones we use custom low latency hash. - if (len <= 8) { - return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first, - len); - } - if (len <= 16) { - return CombineContiguousImpl9to16(PrecombineLengthMix(state, len), first, - len); - } - if (len <= 32) { - return CombineContiguousImpl17to32(PrecombineLengthMix(state, len), first, - len); - } - if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) { - // Length is mixed into the state inside of Hash64. - return Hash64(first, len, state); - } - // We must not mix length to the state here because calling - // CombineContiguousImpl twice with PiecewiseChunkSize() must be equivalent - // to calling CombineLargeContiguousImpl once with 2 * PiecewiseChunkSize(). - return CombineLargeContiguousImpl64(first, len, state); -} - struct AggregateBarrier {}; // Add a private base class to make sure this type is not an aggregate. diff --git a/absl/hash/internal/low_level_hash.cc b/absl/hash/internal/low_level_hash.cc deleted file mode 100644 index 575cf745c5e..00000000000 --- a/absl/hash/internal/low_level_hash.cc +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2020 The Abseil Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/hash/internal/low_level_hash.h" - -#include -#include -#include - -#include "absl/base/config.h" -#include "absl/base/internal/unaligned_access.h" -#include "absl/base/optimization.h" -#include "absl/base/prefetch.h" -#include "absl/numeric/int128.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace hash_internal { -namespace { - -uint64_t Mix(uint64_t v0, uint64_t v1) { - absl::uint128 p = v0; - p *= v1; - return absl::Uint128Low64(p) ^ absl::Uint128High64(p); -} - -uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) { - uint64_t a = absl::base_internal::UnalignedLoad64(ptr); - uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8); - uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16); - uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24); - - uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state); - uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state); - return cs0 ^ cs1; -} - -} // namespace - -uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed) { - assert(len > 32); - const uint8_t* ptr = static_cast(data); - uint64_t current_state = seed ^ kStaticRandomData[0] ^ len; - const uint8_t* last_32_ptr = ptr + len - 32; - - if (len > 64) { - // If we have more than 64 bytes, we're going to handle chunks of 64 - // bytes at a time. We're going to build up four separate hash states - // which we will then hash together. This avoids short dependency chains. - uint64_t duplicated_state0 = current_state; - uint64_t duplicated_state1 = current_state; - uint64_t duplicated_state2 = current_state; - - do { - // Always prefetch the next cacheline. - PrefetchToLocalCache(ptr + ABSL_CACHELINE_SIZE); - - uint64_t a = absl::base_internal::UnalignedLoad64(ptr); - uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8); - uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16); - uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24); - uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32); - uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40); - uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48); - uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56); - - current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state); - duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0); - - duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1); - duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2); - - ptr += 64; - len -= 64; - } while (len > 64); - - current_state = (current_state ^ duplicated_state0) ^ - (duplicated_state1 + duplicated_state2); - } - - // We now have a data `ptr` with at most 64 bytes and the current state - // of the hashing state machine stored in current_state. - if (len > 32) { - current_state = Mix32Bytes(ptr, current_state); - } - - // We now have a data `ptr` with at most 32 bytes and the current state - // of the hashing state machine stored in current_state. But we can - // safely read from `ptr + len - 32`. - return Mix32Bytes(last_32_ptr, current_state); -} - -} // namespace hash_internal -ABSL_NAMESPACE_END -} // namespace absl diff --git a/absl/hash/internal/low_level_hash.h b/absl/hash/internal/low_level_hash.h deleted file mode 100644 index bb2821c8ca9..00000000000 --- a/absl/hash/internal/low_level_hash.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2020 The Abseil Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// This file provides the Google-internal implementation of LowLevelHash. -// -// LowLevelHash is a fast hash function for hash tables, the fastest we've -// currently (late 2020) found that passes the SMHasher tests. The algorithm -// relies on intrinsic 128-bit multiplication for speed. This is not meant to be -// secure - just fast. -// -// It is closely based on a version of wyhash, but does not maintain or -// guarantee future compatibility with it. - -#ifndef ABSL_HASH_INTERNAL_LOW_LEVEL_HASH_H_ -#define ABSL_HASH_INTERNAL_LOW_LEVEL_HASH_H_ - -#include -#include - -#include "absl/base/config.h" -#include "absl/base/optimization.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace hash_internal { - -// Random data taken from the hexadecimal digits of Pi's fractional component. -// https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number -ABSL_CACHELINE_ALIGNED static constexpr uint64_t kStaticRandomData[] = { - 0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0, - 0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, -}; - -// Hash function for a byte array. A 64-bit seed and a set of five 64-bit -// integers are hashed into the result. The length must be greater than 32. -// -// To allow all hashable types (including string_view and Span) to depend on -// this algorithm, we keep the API low-level, with as few dependencies as -// possible. -uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed); - -} // namespace hash_internal -ABSL_NAMESPACE_END -} // namespace absl - -#endif // ABSL_HASH_INTERNAL_LOW_LEVEL_HASH_H_ diff --git a/absl/hash/internal/low_level_hash_test.cc b/absl/hash/internal/low_level_hash_test.cc index fcfa6ebfa02..9b7868c97b5 100644 --- a/absl/hash/internal/low_level_hash_test.cc +++ b/absl/hash/internal/low_level_hash_test.cc @@ -12,25 +12,26 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "absl/hash/internal/low_level_hash.h" - #include #include +#include #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/hash/hash.h" #include "absl/strings/escaping.h" +#include "absl/strings/string_view.h" #define UPDATE_GOLDEN 0 namespace { TEST(LowLevelHashTest, VerifyGolden) { - constexpr size_t kNumGoldenOutputs = 94; + constexpr size_t kNumGoldenOutputs = 95; static struct { absl::string_view base64_data; uint64_t seed; - } cases[] = { + } cases[kNumGoldenOutputs] = { {"VprUGNH+5NnNRaORxgH/ySrZFQFDL+4VAodhfBNinmn8cg==", uint64_t{0x531858a40bfa7ea1}}, {"gc1xZaY+q0nPcUvOOnWnT3bqfmT/geth/f7Dm2e/DemMfk4=", @@ -357,9 +358,12 @@ TEST(LowLevelHashTest, VerifyGolden) { uint64_t{0xc9ae5c8759b4877a}}, }; -#if defined(ABSL_IS_BIG_ENDIAN) +#if defined(ABSL_IS_BIG_ENDIAN) || !defined(ABSL_HAVE_INTRINSIC_INT128) || \ + UINTPTR_MAX != UINT64_MAX constexpr uint64_t kGolden[kNumGoldenOutputs] = {}; - GTEST_SKIP() << "We only maintain golden data for little endian systems."; + GTEST_SKIP() + << "We only maintain golden data for little endian 64 bit systems with " + "128 bit intristics."; #else constexpr uint64_t kGolden[kNumGoldenOutputs] = { 0x669da02f8d009e0f, 0xceb19bf2255445cd, 0x0e746992d6d43a7c, @@ -393,18 +397,22 @@ TEST(LowLevelHashTest, VerifyGolden) { 0xb8116dd26cf6feec, 0x7a77a6e4ed0cf081, 0xb71eec2d5a184316, 0x6fa932f77b4da817, 0x795f79b33909b2c4, 0x1b8755ef6b5eb34e, 0x2255b72d7d6b2d79, 0xf2bdafafa90bd50a, 0x442a578f02cb1fc8, - 0xc25aefe55ecf83db, + 0xc25aefe55ecf83db, 0x3114c056f9c5a676, }; #endif + auto hash_fn = [](absl::string_view s, uint64_t state) { + return absl::hash_internal::CombineLargeContiguousImplOn64BitLengthGt32( + reinterpret_cast(s.data()), s.size(), state); + }; + #if UPDATE_GOLDEN (void)kGolden; // Silence warning. for (size_t i = 0; i < kNumGoldenOutputs; ++i) { std::string str; ASSERT_TRUE(absl::Base64Unescape(cases[i].base64_data, &str)); ASSERT_GT(str.size(), 32); - uint64_t h = absl::hash_internal::LowLevelHashLenGt32( - str.data(), str.size(), cases[i].seed); + uint64_t h = hash_fn(str, cases[i].seed); printf("0x%016" PRIx64 ", ", h); if (i % 3 == 2) { printf("\n"); @@ -419,9 +427,7 @@ TEST(LowLevelHashTest, VerifyGolden) { std::string str; ASSERT_TRUE(absl::Base64Unescape(cases[i].base64_data, &str)); ASSERT_GT(str.size(), 32); - EXPECT_EQ(absl::hash_internal::LowLevelHashLenGt32(str.data(), str.size(), - cases[i].seed), - kGolden[i]); + EXPECT_EQ(hash_fn(str, cases[i].seed), kGolden[i]); } #endif } From 58e9ca14e6954548aea66c669ca98b255b5a7dfe Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 12 Jun 2025 09:04:19 -0700 Subject: [PATCH 075/107] Mark Voidify::operator&&() as no-inline. This improves stack trace for `LOG(FATAL)` with optimization on. PiperOrigin-RevId: 770679129 Change-Id: I2c7326725d5f1749165c04d3225833fdb22191d6 --- absl/log/internal/voidify.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/absl/log/internal/voidify.h b/absl/log/internal/voidify.h index f42859eba14..1423e1c5764 100644 --- a/absl/log/internal/voidify.h +++ b/absl/log/internal/voidify.h @@ -36,7 +36,8 @@ class Voidify final { // This has to be an operator with a precedence lower than << but higher than // ?: template - ABSL_ATTRIBUTE_COLD void operator&&(T&& message) const&& { + ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void operator&&( + T&& message) const&& { // The dispatching of the completed `absl::LogEntry` to applicable // `absl::LogSink`s happens here. message.Flush(); From ca47784155e6351812f212c183ce4a8744f28f68 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 12 Jun 2025 14:43:31 -0700 Subject: [PATCH 076/107] Revert no-inline on Voidify::operator&&() -- caused unexpected binary size growth PiperOrigin-RevId: 770801572 Change-Id: Ia553cfcc5bfc2906d82990cec259d1ffdfbda2d4 --- absl/log/internal/voidify.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/absl/log/internal/voidify.h b/absl/log/internal/voidify.h index 1423e1c5764..f42859eba14 100644 --- a/absl/log/internal/voidify.h +++ b/absl/log/internal/voidify.h @@ -36,8 +36,7 @@ class Voidify final { // This has to be an operator with a precedence lower than << but higher than // ?: template - ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void operator&&( - T&& message) const&& { + ABSL_ATTRIBUTE_COLD void operator&&(T&& message) const&& { // The dispatching of the completed `absl::LogEntry` to applicable // `absl::LogSink`s happens here. message.Flush(); From 888c22115ff0280bf444b889e9a74a0c9ec37ac8 Mon Sep 17 00:00:00 2001 From: Ryan Buckley Date: Fri, 13 Jun 2025 09:54:28 -0700 Subject: [PATCH 077/107] Add support for scoped enumerations in CHECK_XX(). This change allows values of a scoped enum (that is, an `enum class` or `enum struct`) to be passed to CHECK_EQ and its relatives (CHECK_NE, DCHECK_GE, etc). Before this change, this was not possible because enum class values could not be printed without a custom operator<< or AbslStringify implementation. This change adds support by converting the scoped-enum values to their underlying types for printing purposes when a CHECK_XX fails. If a scoped enum already has operator<< or AbslStringify defined, those methods are still preferred. One design detail: enums can have char as their underlying type. For consistency with unscoped enums, a char-backed scoped enum is printed as an ASCII character when its value is in the printable ASCII range (decimal 32 to 126). PiperOrigin-RevId: 771129686 Change-Id: I4d0ba0f4e1dc1264df4ee8d0230d3dbd02581c9b --- absl/log/check_test_impl.inc | 275 ++++++++++++++++++++++++++++++++++- absl/log/internal/check_op.h | 36 ++++- 2 files changed, 304 insertions(+), 7 deletions(-) diff --git a/absl/log/check_test_impl.inc b/absl/log/check_test_impl.inc index 7a0000e1354..37226a364f3 100644 --- a/absl/log/check_test_impl.inc +++ b/absl/log/check_test_impl.inc @@ -22,6 +22,8 @@ #error ABSL_TEST_CHECK must be defined for these tests to work. #endif +#include +#include #include #include @@ -40,6 +42,7 @@ namespace absl_log_internal { using ::testing::AllOf; using ::testing::AnyOf; +using ::testing::ContainsRegex; using ::testing::HasSubstr; using ::testing::Not; @@ -638,9 +641,8 @@ TEST(CHECKDeathTest, TestPointerPrintedAsNumberDespiteAbslStringify) { EXPECT_DEATH( ABSL_TEST_CHECK_EQ(p, nullptr), AnyOf( - HasSubstr("Check failed: p == nullptr (0000000000001234 vs. (null))"), - HasSubstr("Check failed: p == nullptr (0x1234 vs. (null))") - )); + HasSubstr("Check failed: p == nullptr (0000000000001234 vs. (null))"), + HasSubstr("Check failed: p == nullptr (0x1234 vs. (null))"))); } // An uncopyable object with operator<<. @@ -670,6 +672,273 @@ TEST(CHECKDeathTest, TestUncopyable) { HasSubstr("Check failed: v1 == v2 (Uncopyable{1} vs. Uncopyable{2})")); } +enum class ScopedEnum { kValue1 = 1, kValue2 = 2 }; + +TEST(CHECKTest, TestScopedEnumComparisonChecks) { + ABSL_TEST_CHECK_EQ(ScopedEnum::kValue1, ScopedEnum::kValue1); + ABSL_TEST_CHECK_NE(ScopedEnum::kValue1, ScopedEnum::kValue2); + ABSL_TEST_CHECK_LT(ScopedEnum::kValue1, ScopedEnum::kValue2); + ABSL_TEST_CHECK_LE(ScopedEnum::kValue1, ScopedEnum::kValue2); + ABSL_TEST_CHECK_GT(ScopedEnum::kValue2, ScopedEnum::kValue1); + ABSL_TEST_CHECK_GE(ScopedEnum::kValue2, ScopedEnum::kValue2); + ABSL_TEST_DCHECK_EQ(ScopedEnum::kValue1, ScopedEnum::kValue1); + ABSL_TEST_DCHECK_NE(ScopedEnum::kValue1, ScopedEnum::kValue2); + ABSL_TEST_DCHECK_LT(ScopedEnum::kValue1, ScopedEnum::kValue2); + ABSL_TEST_DCHECK_LE(ScopedEnum::kValue1, ScopedEnum::kValue2); + ABSL_TEST_DCHECK_GT(ScopedEnum::kValue2, ScopedEnum::kValue1); + ABSL_TEST_DCHECK_GE(ScopedEnum::kValue2, ScopedEnum::kValue2); + + // Check that overloads work correctly with references as well. + const ScopedEnum x = ScopedEnum::kValue1; + const ScopedEnum& x_ref = x; + ABSL_TEST_CHECK_EQ(x, x_ref); + ABSL_TEST_CHECK_EQ(x_ref, x_ref); +} + +#if GTEST_HAS_DEATH_TEST +TEST(CHECKDeathTest, TestScopedEnumCheckFailureMessagePrintsIntegerValues) { + const auto e1 = ScopedEnum::kValue1; + const auto e2 = ScopedEnum::kValue2; + EXPECT_DEATH(ABSL_TEST_CHECK_EQ(e1, e2), + ContainsRegex(R"re(Check failed:.*\(1 vs. 2\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_NE(e1, e1), + ContainsRegex(R"re(Check failed:.*\(1 vs. 1\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_GT(e1, e1), + ContainsRegex(R"re(Check failed:.*\(1 vs. 1\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_GE(e1, e2), + ContainsRegex(R"re(Check failed:.*\(1 vs. 2\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_LT(e2, e2), + ContainsRegex(R"re(Check failed:.*\(2 vs. 2\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_LE(e2, e1), + ContainsRegex(R"re(Check failed:.*\(2 vs. 1\))re")); + + const auto& e1_ref = e1; + EXPECT_DEATH(ABSL_TEST_CHECK_NE(e1_ref, e1), + ContainsRegex(R"re(Check failed:.*\(1 vs. 1\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_NE(e1_ref, e1_ref), + ContainsRegex(R"re(Check failed:.*\(1 vs. 1\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_EQ(e2, e1_ref), + ContainsRegex(R"re(Check failed:.*\(2 vs. 1\))re")); + +#ifndef NDEBUG + EXPECT_DEATH(ABSL_TEST_DCHECK_EQ(e2, e1), + ContainsRegex(R"re(Check failed:.*\(2 vs. 1\))re")); +#else + // DHECK_EQ is not evaluated in non-debug mode. + ABSL_TEST_DCHECK_EQ(e2, e1); +#endif // NDEBUG +} +#endif // GTEST_HAS_DEATH_TEST + +enum class ScopedInt8Enum : int8_t { + kValue1 = 1, + kValue2 = 66 // Printable ascii value 'B'. +}; + +TEST(CHECKDeathTest, TestScopedInt8EnumCheckFailureMessagePrintsCharValues) { + const auto e1 = ScopedInt8Enum::kValue1; + const auto e2 = ScopedInt8Enum::kValue2; + EXPECT_DEATH( + ABSL_TEST_CHECK_EQ(e1, e2), + ContainsRegex(R"re(Check failed:.*\(signed char value 1 vs. 'B'\))re")); + EXPECT_DEATH( + ABSL_TEST_CHECK_NE(e1, e1), + ContainsRegex( + R"re(Check failed:.*\(signed char value 1 vs. signed char value 1\))re")); + EXPECT_DEATH( + ABSL_TEST_CHECK_GT(e1, e1), + ContainsRegex( + R"re(Check failed:.*\(signed char value 1 vs. signed char value 1\))re")); + EXPECT_DEATH( + ABSL_TEST_CHECK_GE(e1, e2), + ContainsRegex(R"re(Check failed:.*\(signed char value 1 vs. 'B'\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_LT(e2, e2), + ContainsRegex(R"re(Check failed:.*\('B' vs. 'B'\))re")); + EXPECT_DEATH( + ABSL_TEST_CHECK_LE(e2, e1), + ContainsRegex(R"re(Check failed:.*\('B' vs. signed char value 1\))re")); +} + +enum class ScopedUnsignedEnum : uint16_t { + kValue1 = std::numeric_limits::min(), + kValue2 = std::numeric_limits::max() +}; + +TEST(CHECKDeathTest, + TestScopedUnsignedEnumCheckFailureMessagePrintsCorrectValues) { + const auto e1 = ScopedUnsignedEnum::kValue1; + const auto e2 = ScopedUnsignedEnum::kValue2; + EXPECT_DEATH(ABSL_TEST_CHECK_EQ(e1, e2), + ContainsRegex(R"re(Check failed:.*\(0 vs. 65535\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_NE(e1, e1), + ContainsRegex(R"re(Check failed:.*\(0 vs. 0\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_GT(e1, e1), + ContainsRegex(R"re(Check failed:.*\(0 vs. 0\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_GE(e1, e2), + ContainsRegex(R"re(Check failed:.*\(0 vs. 65535\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_LT(e1, e1), + ContainsRegex(R"re(Check failed:.*\(0 vs. 0\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_LE(e2, e1), + ContainsRegex(R"re(Check failed:.*\(65535 vs. 0\))re")); +} + +enum class ScopedInt64Enum : int64_t { + kMin = std::numeric_limits::min(), + kMax = std::numeric_limits::max(), +}; + +// Tests that int64-backed enums are printed correctly even for very large and +// very small values. +TEST(CHECKDeathTest, TestScopedInt64EnumCheckFailureMessage) { + const auto min = ScopedInt64Enum::kMin; + const auto max = ScopedInt64Enum::kMax; + EXPECT_DEATH( + ABSL_TEST_CHECK_EQ(max, min), + ContainsRegex( + "Check failed:.*9223372036854775807 vs. -9223372036854775808")); + EXPECT_DEATH( + ABSL_TEST_CHECK_NE(max, max), + ContainsRegex( + "Check failed:.*9223372036854775807 vs. 9223372036854775807")); + EXPECT_DEATH( + ABSL_TEST_CHECK_GT(min, min), + ContainsRegex( + "Check failed:.*-9223372036854775808 vs. -9223372036854775808")); + EXPECT_DEATH( + ABSL_TEST_CHECK_GE(min, max), + ContainsRegex( + R"(Check failed:.*-9223372036854775808 vs. 9223372036854775807)")); + EXPECT_DEATH( + ABSL_TEST_CHECK_LT(max, max), + ContainsRegex( + R"(Check failed:.*9223372036854775807 vs. 9223372036854775807)")); + EXPECT_DEATH( + ABSL_TEST_CHECK_LE(max, min), + ContainsRegex( + R"(Check failed:.*9223372036854775807 vs. -9223372036854775808)")); +} + +enum class ScopedBoolEnum : bool { + kFalse, + kTrue, +}; + +TEST(CHECKDeathTest, TestScopedBoolEnumCheckFailureMessagePrintsCorrectValues) { + const auto t = ScopedBoolEnum::kTrue; + const auto f = ScopedBoolEnum::kFalse; + EXPECT_DEATH(ABSL_TEST_CHECK_EQ(t, f), + ContainsRegex(R"re(Check failed:.*\(1 vs. 0\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_NE(f, f), + ContainsRegex(R"re(Check failed:.*\(0 vs. 0\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_GT(f, f), + ContainsRegex(R"re(Check failed:.*\(0 vs. 0\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_GE(f, t), + ContainsRegex(R"re(Check failed:.*\(0 vs. 1\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_LT(t, t), + ContainsRegex(R"re(Check failed:.*\(1 vs. 1\))re")); + EXPECT_DEATH(ABSL_TEST_CHECK_LE(t, f), + ContainsRegex(R"re(Check failed:.*\(1 vs. 0\))re")); +} + +enum class ScopedEnumWithAbslStringify { + kValue1 = 1, + kValue2 = 2, + kValue3 = 3 +}; + +template +void AbslStringify(Sink& sink, ScopedEnumWithAbslStringify v) { + switch (v) { + case ScopedEnumWithAbslStringify::kValue1: + sink.Append("AbslStringify: kValue1"); + break; + case ScopedEnumWithAbslStringify::kValue2: + sink.Append("AbslStringify: kValue2"); + break; + case ScopedEnumWithAbslStringify::kValue3: + sink.Append("AbslStringify: kValue3"); + break; + } +} + +#if GTEST_HAS_DEATH_TEST +TEST(CHECKDeathTest, TestScopedEnumUsesAbslStringify) { + EXPECT_DEATH(ABSL_TEST_CHECK_EQ(ScopedEnumWithAbslStringify::kValue1, + ScopedEnumWithAbslStringify::kValue2), + ContainsRegex("Check failed:.*AbslStringify: kValue1 vs. " + "AbslStringify: kValue2")); +} +#endif // GTEST_HAS_DEATH_TEST + +enum class ScopedEnumWithOutputOperator { + kValue1 = 1, + kValue2 = 2, +}; + +std::ostream& operator<<(std::ostream& os, ScopedEnumWithOutputOperator v) { + switch (v) { + case ScopedEnumWithOutputOperator::kValue1: + os << "OutputOperator: kValue1"; + break; + case ScopedEnumWithOutputOperator::kValue2: + os << "OutputOperator: kValue2"; + break; + } + return os; +} + +#if GTEST_HAS_DEATH_TEST +TEST(CHECKDeathTest, TestOutputOperatorIsUsedForScopedEnum) { + EXPECT_DEATH(ABSL_TEST_CHECK_EQ(ScopedEnumWithOutputOperator::kValue1, + ScopedEnumWithOutputOperator::kValue2), + ContainsRegex("Check failed:.*OutputOperator: kValue1 vs. " + "OutputOperator: kValue2")); +} +#endif // GTEST_HAS_DEATH_TEST + +enum class ScopedEnumWithAbslStringifyAndOutputOperator { + kValue1 = 1, + kValue2 = 2, +}; + +template +void AbslStringify(Sink& sink, ScopedEnumWithAbslStringifyAndOutputOperator v) { + switch (v) { + case ScopedEnumWithAbslStringifyAndOutputOperator::kValue1: + sink.Append("AbslStringify: kValue1"); + break; + case ScopedEnumWithAbslStringifyAndOutputOperator::kValue2: + sink.Append("AbslStringify: kValue2"); + break; + } +} + +std::ostream& operator<<(std::ostream& os, + ScopedEnumWithAbslStringifyAndOutputOperator v) { + switch (v) { + case ScopedEnumWithAbslStringifyAndOutputOperator::kValue1: + os << "OutputOperator: kValue1"; + break; + case ScopedEnumWithAbslStringifyAndOutputOperator::kValue2: + os << "OutputOperator: kValue2"; + break; + } + return os; +} + +#if GTEST_HAS_DEATH_TEST + +// Test that, if operator<< and AbslStringify are both defined for a scoped +// enum, streaming takes precedence over AbslStringify. +TEST(CHECKDeathTest, TestScopedEnumPrefersOutputOperatorOverAbslStringify) { + EXPECT_DEATH( + ABSL_TEST_CHECK_EQ(ScopedEnumWithAbslStringifyAndOutputOperator::kValue1, + ScopedEnumWithAbslStringifyAndOutputOperator::kValue2), + ContainsRegex("Check failed:.*OutputOperator: kValue1 vs. " + "OutputOperator: kValue2")); +} +#endif // GTEST_HAS_DEATH_TEST + } // namespace absl_log_internal // NOLINTEND(misc-definitions-in-headers) diff --git a/absl/log/internal/check_op.h b/absl/log/internal/check_op.h index 17afdedae25..d7b55f6f1ae 100644 --- a/absl/log/internal/check_op.h +++ b/absl/log/internal/check_op.h @@ -298,12 +298,11 @@ const T& Detect(int); // This overload triggers when the call is ambiguous. // It means that T is either one from this list or printed as one from this -// list. Eg an enum that decays to `int` for printing. +// list. Eg an unscoped enum that decays to `int` for printing. // We ask the overload set to give us the type we want to convert it to. template -decltype(detect_specialization::operator<<(std::declval(), - std::declval())) -Detect(char); +decltype(detect_specialization::operator<<( + std::declval(), std::declval())) Detect(char); // A sink for AbslStringify which redirects everything to a std::ostream. class StringifySink { @@ -344,6 +343,35 @@ template std::enable_if_t::value, StringifyToStreamWrapper> Detect(...); // Ellipsis has lowest preference when int passed. + +// is_streamable is true for types that have an output stream operator<<. +template +struct is_streamable : std::false_type {}; + +template +struct is_streamable() + << std::declval())>> + : std::true_type {}; + +// This overload triggers when T is a scoped enum that has not defined an output +// stream operator (operator<<) or AbslStringify. It causes the enum value to be +// converted to a type that can be streamed. For consistency with other enums, a +// scoped enum backed by a bool or char is converted to its underlying type, and +// one backed by another integer is converted to (u)int64_t. +template +std::enable_if_t< + std::conjunction_v< + std::is_enum, std::negation>, + std::negation>, std::negation>>, + std::conditional_t< + std::is_same_v, bool> || + std::is_same_v, char> || + std::is_same_v, signed char> || + std::is_same_v, unsigned char>, + std::underlying_type_t, + std::conditional_t>, int64_t, + uint64_t>>> +Detect(...); } // namespace detect_specialization template From 52227846bbc04c5fc112a0e724c4b340828d50dd Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 16 Jun 2025 10:18:49 -0700 Subject: [PATCH 078/107] Pass swisstable seed as seed to absl::Hash so we can save an XOR in H1. This reverts the optimization in Copy() of not hashing keys for single-group tables because we can no longer depend on H2 being constant for different tables of the same type. Note that I also tried a version in which we don't change PerTableSeed to use sign-extended loads, but that version had worse performance on loadtests - presumably because the high bits of the seed are all 0. The intuition here is that if the high bits of the seed are all 0, then the high bits of the 128-bit product of the first Mix can also be all 0, which degrades the quality of the state after the first Mix. PiperOrigin-RevId: 772077751 Change-Id: I5193d90209f4a0c7b41e36a484adb989311aed65 --- absl/container/BUILD.bazel | 2 + absl/container/CMakeLists.txt | 2 + absl/container/flat_hash_map.h | 4 +- absl/container/flat_hash_set.h | 4 +- absl/container/flat_hash_set_test.cc | 14 ++ absl/container/internal/container_memory.h | 40 +++- .../internal/container_memory_test.cc | 34 ++- .../internal/hash_function_defaults.h | 12 + absl/container/internal/hash_policy_traits.h | 18 +- .../internal/hash_policy_traits_test.cc | 23 +- absl/container/internal/raw_hash_set.cc | 213 +++++++++--------- absl/container/internal/raw_hash_set.h | 195 +++++++++------- .../internal/raw_hash_set_allocator_test.cc | 2 +- .../internal/raw_hash_set_benchmark.cc | 4 +- .../internal/raw_hash_set_probe_benchmark.cc | 2 +- absl/container/internal/raw_hash_set_test.cc | 22 +- absl/container/node_hash_map.h | 4 +- absl/container/node_hash_set.h | 4 +- absl/hash/internal/hash.h | 29 ++- 19 files changed, 386 insertions(+), 242 deletions(-) diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel index 9a79523761c..66587729b5e 100644 --- a/absl/container/BUILD.bazel +++ b/absl/container/BUILD.bazel @@ -410,6 +410,7 @@ cc_library( linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ "//absl/base:config", + "//absl/hash", "//absl/memory", "//absl/meta:type_traits", "//absl/utility", @@ -749,6 +750,7 @@ cc_library( ":hashtable_debug_hooks", ":hashtablez_sampler", ":raw_hash_set_resize_impl", + "//absl/base", "//absl/base:config", "//absl/base:core_headers", "//absl/base:dynamic_annotations", diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt index b1c3ffac750..6adba18c383 100644 --- a/absl/container/CMakeLists.txt +++ b/absl/container/CMakeLists.txt @@ -469,6 +469,7 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} DEPS absl::config + absl::hash absl::memory absl::type_traits absl::utility @@ -786,6 +787,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::base absl::bits absl::common_policy_traits absl::compressed_tuple diff --git a/absl/container/flat_hash_map.h b/absl/container/flat_hash_map.h index 5fa502328d2..a1f4f24aae0 100644 --- a/absl/container/flat_hash_map.h +++ b/absl/container/flat_hash_map.h @@ -660,10 +660,10 @@ struct FlatHashMapPolicy { std::forward(args)...); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return memory_internal::IsLayoutCompatible::value - ? &TypeErasedApplyToSlotFn + ? &TypeErasedApplyToSlotFn : nullptr; } diff --git a/absl/container/flat_hash_set.h b/absl/container/flat_hash_set.h index bc1ceb17e63..2d255529ba1 100644 --- a/absl/container/flat_hash_set.h +++ b/absl/container/flat_hash_set.h @@ -558,9 +558,9 @@ struct FlatHashSetPolicy { static size_t space_used(const T*) { return 0; } - template + template static constexpr HashSlotFn get_hash_slot_fn() { - return &TypeErasedApplyToSlotFn; + return &TypeErasedApplyToSlotFn; } }; } // namespace container_internal diff --git a/absl/container/flat_hash_set_test.cc b/absl/container/flat_hash_set_test.cc index bb90efa2a09..ca069b402ad 100644 --- a/absl/container/flat_hash_set_test.cc +++ b/absl/container/flat_hash_set_test.cc @@ -383,6 +383,20 @@ TEST(FlatHashSet, MoveOnlyKey) { EXPECT_THAT(s, UnorderedElementsAre(1, 2, 3)); } +TEST(FlatHashSet, IsDefaultHash) { + using absl::container_internal::hashtable_debug_internal:: + HashtableDebugAccess; + EXPECT_EQ(HashtableDebugAccess>::kIsDefaultHash, true); + EXPECT_EQ(HashtableDebugAccess>::kIsDefaultHash, + true); + + struct Hash { + size_t operator()(size_t i) const { return i; } + }; + EXPECT_EQ((HashtableDebugAccess>::kIsDefaultHash), + false); +} + } // namespace } // namespace container_internal ABSL_NAMESPACE_END diff --git a/absl/container/internal/container_memory.h b/absl/container/internal/container_memory.h index ed7b90b169a..8c974698b11 100644 --- a/absl/container/internal/container_memory.h +++ b/absl/container/internal/container_memory.h @@ -26,6 +26,7 @@ #include #include "absl/base/config.h" +#include "absl/hash/hash.h" #include "absl/memory/memory.h" #include "absl/meta/type_traits.h" #include "absl/utility/utility.h" @@ -483,19 +484,33 @@ struct map_slot_policy { // Variadic arguments hash function that ignore the rest of the arguments. // Useful for usage with policy traits. -template +template struct HashElement { + HashElement(const Hash& h, size_t s) : hash(h), seed(s) {} + template size_t operator()(const K& key, Args&&...) const { - return h(key); + if constexpr (kIsDefault) { + // TODO(b/384509507): resolve `no header providing + // "absl::hash_internal::SupportsHashWithSeed" is directly included`. + // Maybe we should make "internal/hash.h" be a separate library. + return absl::hash_internal::HashWithSeed().hash(hash, key, seed); + } + // NOLINTNEXTLINE(clang-diagnostic-sign-conversion) + return hash(key) ^ seed; } - const Hash& h; + const Hash& hash; + size_t seed; }; // No arguments function hash function for a specific key. -template +template struct HashKey { - size_t operator()() const { return HashElement{hash}(key); } + HashKey(const Hash& h, const Key& k) : hash(h), key(k) {} + + size_t operator()(size_t seed) const { + return HashElement{hash, seed}(key); + } const Hash& hash; const Key& key; }; @@ -513,23 +528,24 @@ struct EqualElement { }; // Type erased function for computing hash of the slot. -using HashSlotFn = size_t (*)(const void* hash_fn, void* slot); +using HashSlotFn = size_t (*)(const void* hash_fn, void* slot, size_t seed); // Type erased function to apply `Fn` to data inside of the `slot`. // The data is expected to have type `T`. -template -size_t TypeErasedApplyToSlotFn(const void* fn, void* slot) { +template +size_t TypeErasedApplyToSlotFn(const void* fn, void* slot, size_t seed) { const auto* f = static_cast(fn); - return HashElement{*f}(*static_cast(slot)); + return HashElement{*f, seed}(*static_cast(slot)); } // Type erased function to apply `Fn` to data inside of the `*slot_ptr`. // The data is expected to have type `T`. -template -size_t TypeErasedDerefAndApplyToSlotFn(const void* fn, void* slot_ptr) { +template +size_t TypeErasedDerefAndApplyToSlotFn(const void* fn, void* slot_ptr, + size_t seed) { const auto* f = static_cast(fn); const T* slot = *static_cast(slot_ptr); - return HashElement{*f}(*slot); + return HashElement{*f, seed}(*slot); } } // namespace container_internal diff --git a/absl/container/internal/container_memory_test.cc b/absl/container/internal/container_memory_test.cc index 7e4357d5ed5..97b09f758e7 100644 --- a/absl/container/internal/container_memory_test.cc +++ b/absl/container/internal/container_memory_test.cc @@ -300,16 +300,46 @@ TEST(MapSlotPolicy, DestroyReturnsTrue) { TEST(ApplyTest, TypeErasedApplyToSlotFn) { size_t x = 7; + size_t seed = 100; auto fn = [](size_t v) { return v * 2; }; - EXPECT_EQ((TypeErasedApplyToSlotFn(&fn, &x)), 14); + EXPECT_EQ( + (TypeErasedApplyToSlotFn( + &fn, &x, seed)), + (HashElement(fn, seed)(x))); } TEST(ApplyTest, TypeErasedDerefAndApplyToSlotFn) { size_t x = 7; + size_t seed = 100; auto fn = [](size_t v) { return v * 2; }; size_t* x_ptr = &x; + EXPECT_EQ((TypeErasedDerefAndApplyToSlotFn(&fn, &x_ptr, + seed)), + (HashElement(fn, seed)(x))); +} + +TEST(HashElement, DefaultHash) { + size_t x = 7; + size_t seed = 100; + struct HashWithSeed { + size_t operator()(size_t v) const { return v * 2; } + size_t hash_with_seed(size_t v, size_t seed) const { + return v * 2 + seed * 3; + } + } hash; + EXPECT_EQ((HashElement(hash, seed)(x)), + hash.hash_with_seed(x, seed)); +} + +TEST(HashElement, NonDefaultHash) { + size_t x = 7; + size_t seed = 100; + auto fn = [](size_t v) { return v * 2; }; EXPECT_EQ( - (TypeErasedDerefAndApplyToSlotFn(&fn, &x_ptr)), 14); + (HashElement( + fn, seed)(x)), + fn(x) ^ seed); } } // namespace diff --git a/absl/container/internal/hash_function_defaults.h b/absl/container/internal/hash_function_defaults.h index c2a757b53f6..eefecabceb7 100644 --- a/absl/container/internal/hash_function_defaults.h +++ b/absl/container/internal/hash_function_defaults.h @@ -79,6 +79,18 @@ struct StringHash { size_t operator()(const absl::Cord& v) const { return absl::Hash{}(v); } + + private: + friend struct absl::hash_internal::HashWithSeed; + + size_t hash_with_seed(absl::string_view v, size_t seed) const { + return absl::hash_internal::HashWithSeed().hash( + absl::Hash{}, v, seed); + } + size_t hash_with_seed(const absl::Cord& v, size_t seed) const { + return absl::hash_internal::HashWithSeed().hash(absl::Hash{}, v, + seed); + } }; struct StringEq { diff --git a/absl/container/internal/hash_policy_traits.h b/absl/container/internal/hash_policy_traits.h index 1d7c910af81..82eed2a9777 100644 --- a/absl/container/internal/hash_policy_traits.h +++ b/absl/container/internal/hash_policy_traits.h @@ -146,7 +146,7 @@ struct hash_policy_traits : common_policy_traits { return P::value(elem); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { // get_hash_slot_fn may return nullptr to signal that non type erased function // should be used. GCC warns against comparing function address with nullptr. @@ -155,9 +155,9 @@ struct hash_policy_traits : common_policy_traits { // silent error: the address of * will never be NULL [-Werror=address] #pragma GCC diagnostic ignored "-Waddress" #endif - return Policy::template get_hash_slot_fn() == nullptr - ? &hash_slot_fn_non_type_erased - : Policy::template get_hash_slot_fn(); + return Policy::template get_hash_slot_fn() == nullptr + ? &hash_slot_fn_non_type_erased + : Policy::template get_hash_slot_fn(); #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop #endif @@ -167,10 +167,12 @@ struct hash_policy_traits : common_policy_traits { static constexpr bool soo_enabled() { return soo_enabled_impl(Rank1{}); } private: - template - static size_t hash_slot_fn_non_type_erased(const void* hash_fn, void* slot) { - return Policy::apply(HashElement{*static_cast(hash_fn)}, - Policy::element(static_cast(slot))); + template + static size_t hash_slot_fn_non_type_erased(const void* hash_fn, void* slot, + size_t seed) { + return Policy::apply( + HashElement{*static_cast(hash_fn), seed}, + Policy::element(static_cast(slot))); } // Use go/ranked-overloads for dispatching. Rank1 is preferred. diff --git a/absl/container/internal/hash_policy_traits_test.cc b/absl/container/internal/hash_policy_traits_test.cc index 2d2c7c2c38a..03de1322a1f 100644 --- a/absl/container/internal/hash_policy_traits_test.cc +++ b/absl/container/internal/hash_policy_traits_test.cc @@ -45,7 +45,7 @@ struct PolicyWithoutOptionalOps { static std::function apply_impl; static std::function value; - template + template static constexpr HashSlotFn get_hash_slot_fn() { return nullptr; } @@ -99,7 +99,7 @@ struct PolicyNoHashFn { return fn(v); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return nullptr; } @@ -108,9 +108,9 @@ struct PolicyNoHashFn { size_t* PolicyNoHashFn::apply_called_count; struct PolicyCustomHashFn : PolicyNoHashFn { - template + template static constexpr HashSlotFn get_hash_slot_fn() { - return &TypeErasedApplyToSlotFn; + return &TypeErasedApplyToSlotFn; } }; @@ -120,9 +120,11 @@ TEST(HashTest, PolicyNoHashFn_get_hash_slot_fn) { Hash hasher; Slot value = 7; - auto* fn = hash_policy_traits::get_hash_slot_fn(); + auto* fn = hash_policy_traits::get_hash_slot_fn< + Hash, /*kIsDefault=*/false>(); EXPECT_NE(fn, nullptr); - EXPECT_EQ(fn(&hasher, &value), hasher(value)); + EXPECT_EQ(fn(&hasher, &value, 100), + (HashElement(hasher, 100)(value))); EXPECT_EQ(apply_called_count, 1); } @@ -132,9 +134,12 @@ TEST(HashTest, PolicyCustomHashFn_get_hash_slot_fn) { Hash hasher; Slot value = 7; - auto* fn = hash_policy_traits::get_hash_slot_fn(); - EXPECT_EQ(fn, PolicyCustomHashFn::get_hash_slot_fn()); - EXPECT_EQ(fn(&hasher, &value), hasher(value)); + auto* fn = hash_policy_traits::get_hash_slot_fn< + Hash, /*kIsDefault=*/false>(); + EXPECT_EQ( + fn, (PolicyCustomHashFn::get_hash_slot_fn())); + EXPECT_EQ(fn(&hasher, &value, 100), + (HashElement(hasher, 100)(value))); EXPECT_EQ(apply_called_count, 0); } diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index daa9ff6a952..640c5a5be46 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -88,13 +88,13 @@ inline size_t RandomSeed() { return value ^ static_cast(reinterpret_cast(&counter)); } -bool ShouldRehashForBugDetection(PerTableSeed seed, size_t capacity) { +bool ShouldRehashForBugDetection(size_t capacity) { // Note: we can't use the abseil-random library because abseil-random // depends on swisstable. We want to return true with probability // `min(1, RehashProbabilityConstant() / capacity())`. In order to do this, // we probe based on a random hash and see if the offset is less than // RehashProbabilityConstant(). - return probe(seed, capacity, absl::HashOf(RandomSeed())).offset() < + return probe(capacity, absl::HashOf(RandomSeed())).offset() < RehashProbabilityConstant(); } @@ -140,23 +140,22 @@ GenerationType* EmptyGeneration() { } bool CommonFieldsGenerationInfoEnabled:: - should_rehash_for_bug_detection_on_insert(PerTableSeed seed, - size_t capacity) const { + should_rehash_for_bug_detection_on_insert(size_t capacity) const { if (reserved_growth_ == kReservedGrowthJustRanOut) return true; if (reserved_growth_ > 0) return false; - return ShouldRehashForBugDetection(seed, capacity); + return ShouldRehashForBugDetection(capacity); } bool CommonFieldsGenerationInfoEnabled::should_rehash_for_bug_detection_on_move( - PerTableSeed seed, size_t capacity) const { - return ShouldRehashForBugDetection(seed, capacity); + size_t capacity) const { + return ShouldRehashForBugDetection(capacity); } namespace { FindInfo find_first_non_full_from_h1(const ctrl_t* ctrl, size_t h1, size_t capacity) { - auto seq = probe(h1, capacity); + auto seq = probe_h1(capacity, h1); if (IsEmptyOrDeleted(ctrl[seq.offset()])) { return {seq.offset(), /*probe_length=*/0}; } @@ -248,7 +247,7 @@ void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) { } FindInfo find_first_non_full(const CommonFields& common, size_t hash) { - return find_first_non_full_from_h1(common.control(), H1(hash, common.seed()), + return find_first_non_full_from_h1(common.control(), H1(hash), common.capacity()); } @@ -344,7 +343,7 @@ size_t DropDeletesWithoutResizeAndPrepareInsert( continue; } if (!IsDeleted(ctrl[i])) continue; - const size_t hash = (*hasher)(hash_fn, slot_ptr); + const size_t hash = (*hasher)(hash_fn, slot_ptr, common.seed().seed()); const FindInfo target = find_first_non_full(common, hash); const size_t new_i = target.offset; total_probe_length += target.probe_length; @@ -576,9 +575,10 @@ size_t FindNewPositionsAndTransferSlots( void* new_slots = common.slot_array(); const void* hash_fn = policy.hash_fn(common); const size_t slot_size = policy.slot_size; + const size_t seed = common.seed().seed(); const auto insert_slot = [&](void* slot) { - size_t hash = policy.hash_slot(hash_fn, slot); + size_t hash = policy.hash_slot(hash_fn, slot, seed); FindInfo target; if (common.is_small()) { target = FindInfo{0, 0}; @@ -687,8 +687,9 @@ void ResizeNonSooImpl(CommonFields& common, common.set_capacity(new_capacity); const auto [new_ctrl, new_slots] = AllocBackingArray(common, policy, new_capacity, has_infoz, alloc); - common.set_control(new_ctrl); + common.set_control(new_ctrl); common.set_slots(new_slots); + common.generate_new_seed(has_infoz); size_t total_probe_length = 0; ResetCtrl(common, slot_size); @@ -738,23 +739,26 @@ void ResizeEmptyNonAllocatedTableImpl(CommonFields& common, // It is rare to resize an SOO table with one element to a large size. // Requires: `c` contains SOO data. void InsertOldSooSlotAndInitializeControlBytes( - CommonFields& c, const PolicyFunctions& __restrict policy, size_t hash, - ctrl_t* new_ctrl, void* new_slots) { + CommonFields& c, const PolicyFunctions& __restrict policy, ctrl_t* new_ctrl, + void* new_slots, bool has_infoz) { ABSL_SWISSTABLE_ASSERT(c.size() == policy.soo_capacity()); ABSL_SWISSTABLE_ASSERT(policy.soo_enabled); size_t new_capacity = c.capacity(); - c.generate_new_seed(); - size_t offset = probe(c.seed(), new_capacity, hash).offset(); + c.generate_new_seed(has_infoz); + + const size_t soo_slot_hash = + policy.hash_slot(policy.hash_fn(c), c.soo_data(), c.seed().seed()); + size_t offset = probe(new_capacity, soo_slot_hash).offset(); offset = offset == new_capacity ? 0 : offset; SanitizerPoisonMemoryRegion(new_slots, policy.slot_size * new_capacity); void* target_slot = SlotAddress(new_slots, offset, policy.slot_size); SanitizerUnpoisonMemoryRegion(target_slot, policy.slot_size); policy.transfer_n(&c, target_slot, c.soo_data(), 1); - c.set_control(new_ctrl); + c.set_control(new_ctrl); c.set_slots(new_slots); ResetCtrl(c, policy.slot_size); - SetCtrl(c, offset, H2(hash), policy.slot_size); + SetCtrl(c, offset, H2(soo_slot_hash), policy.slot_size); } enum class ResizeFullSooTableSamplingMode { @@ -783,6 +787,7 @@ void ResizeFullSooTable(CommonFields& common, void* alloc = policy.get_char_alloc(common); HashtablezInfoHandle infoz; + bool has_infoz = false; if (sampling_mode == ResizeFullSooTableSamplingMode::kForceSampleNoResizeIfUnsampled) { if (ABSL_PREDICT_FALSE(policy.is_hashtablez_eligible)) { @@ -790,13 +795,10 @@ void ResizeFullSooTable(CommonFields& common, policy.soo_capacity()); } - if (!infoz.IsSampled()) { - return; - } + if (!infoz.IsSampled()) return; + has_infoz = true; } - const bool has_infoz = infoz.IsSampled(); - common.set_capacity(new_capacity); // We do not set control and slots in CommonFields yet to avoid overriding @@ -804,11 +806,8 @@ void ResizeFullSooTable(CommonFields& common, const auto [new_ctrl, new_slots] = AllocBackingArray(common, policy, new_capacity, has_infoz, alloc); - const size_t soo_slot_hash = - policy.hash_slot(policy.hash_fn(common), common.soo_data()); - - InsertOldSooSlotAndInitializeControlBytes(common, policy, soo_slot_hash, - new_ctrl, new_slots); + InsertOldSooSlotAndInitializeControlBytes(common, policy, new_ctrl, new_slots, + has_infoz); ResetGrowthLeft(common); if (has_infoz) { common.set_has_infoz(); @@ -998,12 +997,13 @@ ABSL_ATTRIBUTE_NOINLINE size_t ProcessProbedMarkedElements( const void* hash_fn = policy.hash_fn(c); auto hash_slot = policy.hash_slot; auto transfer_n = policy.transfer_n; + const size_t seed = c.seed().seed(); for (size_t old_index = start; old_index < old_capacity; ++old_index) { if (old_ctrl[old_index] != ctrl_t::kSentinel) { continue; } void* src_slot = SlotAddress(old_slots, old_index, slot_size); - const size_t hash = hash_slot(hash_fn, src_slot); + const size_t hash = hash_slot(hash_fn, src_slot, seed); const FindInfo target = find_first_non_full(c, hash); total_probe_length += target.probe_length; const size_t new_i = target.offset; @@ -1276,7 +1276,7 @@ void IncrementSmallSize(CommonFields& common, std::pair Grow1To3AndPrepareInsert( CommonFields& common, const PolicyFunctions& __restrict policy, - absl::FunctionRef get_hash) { + absl::FunctionRef get_hash) { // TODO(b/413062340): Refactor to reuse more code with // GrowSooTableToNextCapacityAndPrepareInsert. ABSL_SWISSTABLE_ASSERT(common.capacity() == 1); @@ -1296,13 +1296,18 @@ std::pair Grow1To3AndPrepareInsert( const auto [new_ctrl, new_slots] = AllocBackingArray(common, policy, kNewCapacity, has_infoz, alloc); - common.set_control(new_ctrl); + common.set_control(new_ctrl); common.set_slots(new_slots); SanitizerPoisonMemoryRegion(new_slots, kNewCapacity * slot_size); - const size_t new_hash = get_hash(); + if (ABSL_PREDICT_TRUE(!has_infoz)) { + // When we're sampled, we already have a seed. + common.generate_new_seed(/*has_infoz=*/false); + } + const size_t new_hash = get_hash(common.seed().seed()); h2_t new_h2 = H2(new_hash); - size_t orig_hash = policy.hash_slot(policy.hash_fn(common), old_slots); + size_t orig_hash = + policy.hash_slot(policy.hash_fn(common), old_slots, common.seed().seed()); size_t offset = Resize1To3NewOffset(new_hash, common.seed()); InitializeThreeElementsControlBytes(H2(orig_hash), new_h2, offset, new_ctrl); @@ -1348,7 +1353,7 @@ size_t GrowToNextCapacityAndPrepareInsert( const auto [new_ctrl, new_slots] = AllocBackingArray(common, policy, new_capacity, has_infoz, alloc); - common.set_control(new_ctrl); + common.set_control(new_ctrl); common.set_slots(new_slots); SanitizerPoisonMemoryRegion(new_slots, new_capacity * slot_size); @@ -1397,7 +1402,7 @@ size_t GrowToNextCapacityAndPrepareInsert( std::pair PrepareInsertSmallNonSoo( CommonFields& common, const PolicyFunctions& __restrict policy, - absl::FunctionRef get_hash) { + absl::FunctionRef get_hash) { ABSL_SWISSTABLE_ASSERT(common.is_small()); ABSL_SWISSTABLE_ASSERT(!policy.soo_enabled); if (common.capacity() == 1) { @@ -1426,15 +1431,16 @@ std::pair PrepareInsertSmallNonSoo( const auto [new_ctrl, new_slots] = AllocBackingArray(common, policy, kNewCapacity, has_infoz, alloc); - // In small tables seed is not needed. - common.set_control(new_ctrl); + common.set_control(new_ctrl); common.set_slots(new_slots); static_assert(NextCapacity(0) == 1); PrepareInsertCommon(common); if (ABSL_PREDICT_FALSE(has_infoz)) { - ReportSingleGroupTableGrowthToInfoz(common, infoz, get_hash()); + common.generate_new_seed(/*has_infoz=*/true); + ReportSingleGroupTableGrowthToInfoz(common, infoz, + get_hash(common.seed().seed())); } return {SooControl(), new_slots}; } @@ -1535,11 +1541,12 @@ size_t PrepareInsertLargeSlow(CommonFields& common, ABSL_ATTRIBUTE_NOINLINE size_t GrowEmptySooTableToNextCapacityForceSamplingAndPrepareInsert( CommonFields& common, const PolicyFunctions& __restrict policy, - size_t new_hash) { + absl::FunctionRef get_hash) { ResizeEmptyNonAllocatedTableImpl(common, policy, NextCapacity(SooCapacity()), /*force_infoz=*/true); PrepareInsertCommon(common); common.growth_info().OverwriteEmptyAsFull(); + const size_t new_hash = get_hash(common.seed().seed()); SetCtrlInSingleGroupTable(common, SooSlotIndex(), H2(new_hash), policy.slot_size); common.infoz().RecordInsert(new_hash, /*distance_from_desired=*/0); @@ -1630,12 +1637,12 @@ void ReserveEmptyNonAllocatedTableToFitBucketCount( template size_t GrowSooTableToNextCapacityAndPrepareInsert( CommonFields& common, const PolicyFunctions& __restrict policy, - size_t new_hash, ctrl_t soo_slot_ctrl) { + absl::FunctionRef get_hash, bool force_sampling) { AssertSoo(common, policy); - if (ABSL_PREDICT_FALSE(soo_slot_ctrl == ctrl_t::kEmpty)) { + if (ABSL_PREDICT_FALSE(force_sampling)) { // The table is empty, it is only used for forced sampling of SOO tables. return GrowEmptySooTableToNextCapacityForceSamplingAndPrepareInsert( - common, policy, new_hash); + common, policy, get_hash); } ABSL_SWISSTABLE_ASSERT(common.size() == policy.soo_capacity()); static constexpr size_t kNewCapacity = NextCapacity(SooCapacity()); @@ -1654,11 +1661,14 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert( PrepareInsertCommon(common); ABSL_SWISSTABLE_ASSERT(common.size() == 2); GetGrowthInfoFromControl(new_ctrl).InitGrowthLeftNoDeleted(kNewCapacity - 2); - common.generate_new_seed(); + common.generate_new_seed(/*has_infoz=*/false); + const h2_t soo_slot_h2 = H2(policy.hash_slot( + policy.hash_fn(common), common.soo_data(), common.seed().seed())); + const size_t new_hash = get_hash(common.seed().seed()); const size_t offset = Resize1To3NewOffset(new_hash, common.seed()); - InitializeThreeElementsControlBytes(static_cast(soo_slot_ctrl), - H2(new_hash), offset, new_ctrl); + InitializeThreeElementsControlBytes(soo_slot_h2, H2(new_hash), offset, + new_ctrl); SanitizerPoisonMemoryRegion(new_slots, slot_size * kNewCapacity); void* target_slot = SlotAddress(new_slots, SooSlotIndex(), slot_size); @@ -1680,8 +1690,7 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert( static_assert(SooSlotMemcpySize == 0); policy.transfer_n(&common, target_slot, common.soo_data(), 1); } - // Seed was already generated above. - common.set_control(new_ctrl); + common.set_control(new_ctrl); common.set_slots(new_slots); // Full SOO table couldn't be sampled. If SOO table is sampled, it would @@ -1793,47 +1802,22 @@ void Copy(CommonFields& common, const PolicyFunctions& __restrict policy, ABSL_SWISSTABLE_ASSERT(other.capacity() > soo_capacity); const size_t cap = common.capacity(); ABSL_SWISSTABLE_ASSERT(cap > soo_capacity); - // Note about single group tables: - // 1. It is correct to have any order of elements. - // 2. Order has to be non deterministic. - // 3. We are assigning elements with arbitrary `shift` starting from - // `capacity + shift` position. - // 4. `shift` must be coprime with `capacity + 1` in order to be able to use - // modular arithmetic to traverse all positions, instead of cycling - // through a subset of positions. Odd numbers are coprime with any - // `capacity + 1` (2^N). size_t offset = cap; - const size_t shift = is_single_group(cap) ? (common.seed().seed() | 1) : 0; const void* hash_fn = policy.hash_fn(common); auto hasher = policy.hash_slot; + const size_t seed = common.seed().seed(); IterateOverFullSlotsImpl( - other, slot_size, [&](const ctrl_t* that_ctrl, void* that_slot) { - if (shift == 0) { - // Big tables case. Position must be searched via probing. - // The table is guaranteed to be empty, so we can do faster than - // a full `insert`. - const size_t hash = (*hasher)(hash_fn, that_slot); - FindInfo target = find_first_non_full(common, hash); - infoz.RecordInsert(hash, target.probe_length); - offset = target.offset; - } else { - // Small tables case. Next position is computed via shift. - offset = (offset + shift) & cap; - } - const h2_t h2 = static_cast(*that_ctrl); - // We rely on the hash not changing for small tables. - ABSL_SWISSTABLE_ASSERT( - H2((*hasher)(hash_fn, that_slot)) == h2 && - "hash function value changed unexpectedly during the copy"); - SetCtrl(common, offset, h2, slot_size); + other, slot_size, [&](const ctrl_t*, void* that_slot) { + // The table is guaranteed to be empty, so we can do faster than + // a full `insert`. + const size_t hash = (*hasher)(hash_fn, that_slot, seed); + FindInfo target = find_first_non_full(common, hash); + infoz.RecordInsert(hash, target.probe_length); + offset = target.offset; + SetCtrl(common, offset, H2(hash), slot_size); copy_fn(SlotAddress(common.slot_array(), offset, slot_size), that_slot); common.maybe_increment_generation_on_insert(); }); - if (shift != 0) { - // On small table copy we do not record individual inserts. - // RecordInsert requires hash, but it is unknown for small tables. - infoz.RecordStorageChanged(size, cap); - } common.increment_size(size); common.growth_info().OverwriteManyEmptyAsFull(size); } @@ -1859,18 +1843,11 @@ void ReserveTableToFitNewSize(CommonFields& common, ReserveAllocatedTable(common, policy, new_size); } -size_t PrepareInsertLarge(CommonFields& common, - const PolicyFunctions& __restrict policy, size_t hash, - FindInfo target) { +namespace { +size_t PrepareInsertLargeImpl(CommonFields& common, + const PolicyFunctions& __restrict policy, + size_t hash, FindInfo target) { ABSL_SWISSTABLE_ASSERT(!common.is_small()); - if (common.should_rehash_for_bug_detection_on_insert()) { - // Move to a different heap allocation in order to detect bugs. - const size_t cap = common.capacity(); - ResizeAllocatedTableWithSeedChange( - common, policy, common.growth_left() > 0 ? cap : NextCapacity(cap)); - target = find_first_non_full(common, hash); - } - const GrowthInfo growth_info = common.growth_info(); // When there are no deleted slots in the table // and growth_left is positive, we can insert at the first @@ -1884,6 +1861,31 @@ size_t PrepareInsertLarge(CommonFields& common, common.infoz().RecordInsert(hash, target.probe_length); return target.offset; } +} // namespace + +size_t PrepareInsertLarge(CommonFields& common, + const PolicyFunctions& __restrict policy, size_t hash, + FindInfo target) { + // NOLINTNEXTLINE(misc-static-assert) + ABSL_SWISSTABLE_ASSERT(!SwisstableGenerationsEnabled()); + return PrepareInsertLargeImpl(common, policy, hash, target); +} + +size_t PrepareInsertLargeGenerationsEnabled( + CommonFields& common, const PolicyFunctions& policy, size_t hash, + FindInfo target, absl::FunctionRef recompute_hash) { + // NOLINTNEXTLINE(misc-static-assert) + ABSL_SWISSTABLE_ASSERT(SwisstableGenerationsEnabled()); + if (common.should_rehash_for_bug_detection_on_insert()) { + // Move to a different heap allocation in order to detect bugs. + const size_t cap = common.capacity(); + ResizeAllocatedTableWithSeedChange( + common, policy, common.growth_left() > 0 ? cap : NextCapacity(cap)); + hash = recompute_hash(common.seed().seed()); + target = find_first_non_full(common, hash); + } + return PrepareInsertLargeImpl(common, policy, hash, target); +} namespace { // Returns true if the following is true @@ -1919,32 +1921,33 @@ template size_t TryFindNewIndexWithoutProbing(size_t h1, size_t old_index, // We need to instantiate ALL possible template combinations because we define // the function in the cc file. template size_t GrowSooTableToNextCapacityAndPrepareInsert<0, false>( - CommonFields&, const PolicyFunctions&, size_t, ctrl_t); + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); template size_t GrowSooTableToNextCapacityAndPrepareInsert< - OptimalMemcpySizeForSooSlotTransfer(1), true>(CommonFields&, - const PolicyFunctions&, - size_t, ctrl_t); + OptimalMemcpySizeForSooSlotTransfer(1), true>( + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); static_assert(VerifyOptimalMemcpySizeForSooSlotTransferRange(2, 3)); template size_t GrowSooTableToNextCapacityAndPrepareInsert< - OptimalMemcpySizeForSooSlotTransfer(3), true>(CommonFields&, - const PolicyFunctions&, - size_t, ctrl_t); + OptimalMemcpySizeForSooSlotTransfer(3), true>( + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); static_assert(VerifyOptimalMemcpySizeForSooSlotTransferRange(4, 8)); template size_t GrowSooTableToNextCapacityAndPrepareInsert< - OptimalMemcpySizeForSooSlotTransfer(8), true>(CommonFields&, - const PolicyFunctions&, - size_t, ctrl_t); + OptimalMemcpySizeForSooSlotTransfer(8), true>( + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); #if UINTPTR_MAX == UINT32_MAX static_assert(MaxSooSlotSize() == 8); #else static_assert(VerifyOptimalMemcpySizeForSooSlotTransferRange(9, 16)); template size_t GrowSooTableToNextCapacityAndPrepareInsert< - OptimalMemcpySizeForSooSlotTransfer(16), true>(CommonFields&, - const PolicyFunctions&, - size_t, ctrl_t); + OptimalMemcpySizeForSooSlotTransfer(16), true>( + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); static_assert(MaxSooSlotSize() == 16); #endif diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 7106bc81953..08c5d57aba0 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -194,6 +194,7 @@ #include #include "absl/base/attributes.h" +#include "absl/base/casts.h" #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/internal/iterator_traits.h" @@ -446,18 +447,29 @@ class PerTableSeed { // The number of bits in the seed. // It is big enough to ensure non-determinism of iteration order. // We store the seed inside a uint64_t together with size and other metadata. - // Using 16 bits allows us to save one `and` instruction in H1 (we use movzwl - // instead of movq+and). + // Using 16 bits allows us to save one `and` instruction in H1 (we use + // sign-extended move instead of mov+and). static constexpr size_t kBitCount = 16; + static constexpr size_t kSignBit = uint64_t{1} << (kBitCount - 1); - // Returns the seed for the table. Only the lowest kBitCount are non zero. - size_t seed() const { return seed_; } + // Returns the seed for the table. + size_t seed() const { + // We use a sign-extended load to ensure high bits are non-zero. + int16_t seed_signed = absl::bit_cast(seed_); + auto seed_sign_extended = + static_cast>(seed_signed); + return absl::bit_cast(seed_sign_extended); + } private: friend class HashtableSize; - explicit PerTableSeed(size_t seed) : seed_(seed) {} + explicit PerTableSeed(uint16_t seed) : seed_(seed) { + ABSL_SWISSTABLE_ASSERT((seed & kSignBit) != 0 || seed == 0); + } - const size_t seed_; + // The most significant bit of the seed is always 1 when there is a non-zero + // seed. This way, when sign-extended the seed has non-zero high bits. + const uint16_t seed_; }; // Returns next per-table seed. @@ -496,8 +508,14 @@ class HashtableSize { return PerTableSeed(static_cast(data_) & kSeedMask); } - void generate_new_seed() { - data_ = (data_ & ~kSeedMask) ^ uint64_t{NextSeed()}; + void generate_new_seed() { set_seed(NextSeed()); } + + // We need to use a constant seed when the table is sampled so that sampled + // hashes use the same seed and can e.g. identify stuck bits accurately. + void set_sampled_seed() { set_seed(PerTableSeed::kSignBit); } + + bool is_sampled_seed() const { + return (data_ & kSeedMask) == PerTableSeed::kSignBit; } // Returns true if the table has infoz. @@ -511,6 +529,9 @@ class HashtableSize { void set_no_seed_for_testing() { data_ &= ~kSeedMask; } private: + void set_seed(uint16_t seed) { + data_ = (data_ & ~kSeedMask) | (seed | PerTableSeed::kSignBit); + } static constexpr size_t kSizeShift = 64 - kSizeBitCount; static constexpr uint64_t kSizeOneNoMetadata = uint64_t{1} << kSizeShift; static constexpr uint64_t kMetadataMask = kSizeOneNoMetadata - 1; @@ -521,11 +542,8 @@ class HashtableSize { uint64_t data_; }; -// Mixes the hash with a per-table seed. Note that we only use the low bits of -// H1 because we bitwise-and with capacity later. -inline size_t H1(size_t hash, PerTableSeed seed) { - return hash ^ seed.seed(); -} +// H1 is just the low bits of the hash. +inline size_t H1(size_t hash) { return hash; } // Extracts the H2 portion of a hash: the 7 most significant bits. // @@ -566,11 +584,9 @@ class CommonFieldsGenerationInfoEnabled { // references. We rehash on the first insertion after reserved_growth_ reaches // 0 after a call to reserve. We also do a rehash with low probability // whenever reserved_growth_ is zero. - bool should_rehash_for_bug_detection_on_insert(PerTableSeed seed, - size_t capacity) const; + bool should_rehash_for_bug_detection_on_insert(size_t capacity) const; // Similar to above, except that we don't depend on reserved_growth_. - bool should_rehash_for_bug_detection_on_move(PerTableSeed seed, - size_t capacity) const; + bool should_rehash_for_bug_detection_on_move(size_t capacity) const; void maybe_increment_generation_on_insert() { if (reserved_growth_ == kReservedGrowthJustRanOut) reserved_growth_ = 0; @@ -623,12 +639,8 @@ class CommonFieldsGenerationInfoDisabled { CommonFieldsGenerationInfoDisabled& operator=( CommonFieldsGenerationInfoDisabled&&) = default; - bool should_rehash_for_bug_detection_on_insert(PerTableSeed, size_t) const { - return false; - } - bool should_rehash_for_bug_detection_on_move(PerTableSeed, size_t) const { - return false; - } + bool should_rehash_for_bug_detection_on_insert(size_t) const { return false; } + bool should_rehash_for_bug_detection_on_move(size_t) const { return false; } void maybe_increment_generation_on_insert() {} void increment_generation() {} void reset_reserved_growth(size_t, size_t) {} @@ -955,19 +967,7 @@ class CommonFields : public CommonFieldsGenerationInfo { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap_or_soo_.control().get()); } - // When we set the control bytes, we also often want to generate a new seed. - // So we bundle these two operations together to make sure we don't forget to - // generate a new seed. - // The table will be invalidated if - // `kGenerateSeed && !empty() && !is_single_group(capacity())` because H1 is - // being changed. In such cases, we will need to rehash the table. - template - void set_control(ctrl_t* c) { - heap_or_soo_.control().set(c); - if constexpr (kGenerateSeed) { - generate_new_seed(); - } - } + void set_control(ctrl_t* c) { heap_or_soo_.control().set(c); } // Note: we can't use slots() because Qt defines "slots" as a macro. void* slot_array() const { return heap_or_soo_.slot_array().get(); } @@ -1002,13 +1002,20 @@ class CommonFields : public CommonFieldsGenerationInfo { } bool empty() const { return size_.empty(); } - // The seed used for the H1 part of the hash function. + // The seed used for the hash function. PerTableSeed seed() const { return size_.seed(); } - // Generates a new seed for the H1 part of the hash function. - // The table will be invalidated if - // `kGenerateSeed && !empty() && !is_single_group(capacity())` because H1 is - // being changed. In such cases, we will need to rehash the table. - void generate_new_seed() { size_.generate_new_seed(); } + // Generates a new seed the hash function. + // The table will be invalidated if `!empty()` because hash is being changed. + // In such cases, we will need to rehash the table. + void generate_new_seed(bool has_infoz) { + // Note: we can't use has_infoz() here because we set has_infoz later than + // we generate the seed. + if (ABSL_PREDICT_FALSE(has_infoz)) { + size_.set_sampled_seed(); + return; + } + size_.generate_new_seed(); + } void set_no_seed_for_testing() { size_.set_no_seed_for_testing(); } // The total number of available slots. @@ -1036,7 +1043,10 @@ class CommonFields : public CommonFieldsGenerationInfo { } bool has_infoz() const { return size_.has_infoz(); } - void set_has_infoz() { size_.set_has_infoz(); } + void set_has_infoz() { + ABSL_SWISSTABLE_ASSERT(size_.is_sampled_seed()); + size_.set_has_infoz(); + } HashtablezInfoHandle* infoz_ptr() const { // growth_info is stored before control bytes. @@ -1063,11 +1073,11 @@ class CommonFields : public CommonFieldsGenerationInfo { // will end up rehashing anyways. if (growth_left() == 0) return false; return CommonFieldsGenerationInfo:: - should_rehash_for_bug_detection_on_insert(seed(), capacity()); + should_rehash_for_bug_detection_on_insert(capacity()); } bool should_rehash_for_bug_detection_on_move() const { return CommonFieldsGenerationInfo::should_rehash_for_bug_detection_on_move( - seed(), capacity()); + capacity()); } void reset_reserved_growth(size_t reservation) { CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size()); @@ -1402,15 +1412,14 @@ constexpr bool is_single_group(size_t capacity) { } // Begins a probing operation on `common.control`, using `hash`. -inline probe_seq probe(size_t h1, size_t capacity) { +inline probe_seq probe_h1(size_t capacity, size_t h1) { return probe_seq(h1, capacity); } -inline probe_seq probe(PerTableSeed seed, size_t capacity, - size_t hash) { - return probe(H1(hash, seed), capacity); +inline probe_seq probe(size_t capacity, size_t hash) { + return probe_h1(capacity, H1(hash)); } inline probe_seq probe(const CommonFields& common, size_t hash) { - return probe(common.seed(), common.capacity(), hash); + return probe(common.capacity(), hash); } // Probes an array of control bits using a probe sequence derived from `hash`, @@ -1611,7 +1620,7 @@ struct PolicyFunctions { void* (*hash_fn)(CommonFields& common); // Returns the hash of the pointed-to slot. - size_t (*hash_slot)(const void* hash_fn, void* slot); + HashSlotFn hash_slot; // Transfers the contents of `count` slots from src_slot to dst_slot. // We use ability to transfer several slots in single group table growth. @@ -1766,17 +1775,12 @@ constexpr size_t OptimalMemcpySizeForSooSlotTransfer( // Resizes SOO table to the NextCapacity(SooCapacity()) and prepares insert for // the given new_hash. Returns the offset of the new element. -// `soo_slot_ctrl` is the control byte of the SOO slot. -// If soo_slot_ctrl is kEmpty -// 1. The table must be empty. -// 2. Table will be forced to be sampled. // All possible template combinations are defined in cc file to improve // compilation time. template -size_t GrowSooTableToNextCapacityAndPrepareInsert(CommonFields& common, - const PolicyFunctions& policy, - size_t new_hash, - ctrl_t soo_slot_ctrl); +size_t GrowSooTableToNextCapacityAndPrepareInsert( + CommonFields& common, const PolicyFunctions& policy, + absl::FunctionRef get_hash, bool force_sampling); // PrepareInsert for small tables (is_small()==true). // Returns the new control and the new slot. @@ -1784,7 +1788,7 @@ size_t GrowSooTableToNextCapacityAndPrepareInsert(CommonFields& common, // (is_small()==false). std::pair PrepareInsertSmallNonSoo( CommonFields& common, const PolicyFunctions& policy, - absl::FunctionRef get_hash); + absl::FunctionRef get_hash); // Resizes table with allocated slots and change the table seed. // Tables with SOO enabled must have capacity > policy.soo_capacity. @@ -1837,6 +1841,12 @@ void* GetRefForEmptyClass(CommonFields& common); size_t PrepareInsertLarge(CommonFields& common, const PolicyFunctions& policy, size_t hash, FindInfo target); +// Same as above, but with generations enabled, we may end up changing the seed, +// which means we need to be able to recompute the hash. +size_t PrepareInsertLargeGenerationsEnabled( + CommonFields& common, const PolicyFunctions& policy, size_t hash, + FindInfo target, absl::FunctionRef recompute_hash); + // A SwissTable. // // Policy: a policy defines how to perform different operations on @@ -1890,6 +1900,10 @@ class raw_hash_set { using slot_type = typename PolicyTraits::slot_type; + constexpr static bool kIsDefaultHash = + std::is_same_v> || + std::is_same_v; + // TODO(b/289225379): we could add extra SOO space inside raw_hash_set // after CommonFields to allow inlining larger slot_types (e.g. std::string), // but it's a bit complicated if we want to support incomplete mapped_type in @@ -3091,11 +3105,13 @@ class raw_hash_set { } template ABSL_ATTRIBUTE_ALWAYS_INLINE size_t hash_of(const K& key) const { - return HashElement{hash_ref()}(key); + return HashElement{hash_ref(), + common().seed().seed()}(key); } ABSL_ATTRIBUTE_ALWAYS_INLINE size_t hash_of(slot_type* slot) const { - return PolicyTraits::apply(HashElement{hash_ref()}, - PolicyTraits::element(slot)); + return PolicyTraits::apply( + HashElement{hash_ref(), common().seed().seed()}, + PolicyTraits::element(slot)); } // Casting directly from e.g. char* to slot_type* can cause compilation errors @@ -3211,25 +3227,26 @@ class raw_hash_set { template std::pair find_or_prepare_insert_soo(const K& key) { ABSL_SWISSTABLE_ASSERT(is_soo()); - ctrl_t soo_slot_ctrl; + bool force_sampling; if (empty()) { if (!should_sample_soo()) { common().set_full_soo(); return {single_iterator(), true}; } - soo_slot_ctrl = ctrl_t::kEmpty; + force_sampling = true; } else if (equal_to(key, single_slot())) { return {single_iterator(), false}; } else { - soo_slot_ctrl = static_cast(H2(hash_of(single_slot()))); + force_sampling = false; } ABSL_SWISSTABLE_ASSERT(capacity() == 1); - const size_t hash = hash_of(key); constexpr bool kUseMemcpy = PolicyTraits::transfer_uses_memcpy() && SooEnabled(); size_t index = GrowSooTableToNextCapacityAndPrepareInsert< kUseMemcpy ? OptimalMemcpySizeForSooSlotTransfer(sizeof(slot_type)) : 0, - kUseMemcpy>(common(), GetPolicyFunctions(), hash, soo_slot_ctrl); + kUseMemcpy>(common(), GetPolicyFunctions(), + HashKey{hash_ref(), key}, + force_sampling); return {iterator_at(index), true}; } @@ -3244,9 +3261,9 @@ class raw_hash_set { return {single_iterator(), false}; } } - return {iterator_at_ptr( - PrepareInsertSmallNonSoo(common(), GetPolicyFunctions(), - HashKey{hash_ref(), key})), + return {iterator_at_ptr(PrepareInsertSmallNonSoo( + common(), GetPolicyFunctions(), + HashKey{hash_ref(), key})), true}; } @@ -3270,10 +3287,15 @@ class raw_hash_set { auto mask_empty = g.MaskEmpty(); if (ABSL_PREDICT_TRUE(mask_empty)) { size_t target = seq.offset(mask_empty.LowestBitSet()); - return { - iterator_at(PrepareInsertLarge(common(), GetPolicyFunctions(), hash, - FindInfo{target, seq.index()})), - true}; + size_t index = + SwisstableGenerationsEnabled() + ? PrepareInsertLargeGenerationsEnabled( + common(), GetPolicyFunctions(), hash, + FindInfo{target, seq.index()}, + HashKey{hash_ref(), key}) + : PrepareInsertLarge(common(), GetPolicyFunctions(), hash, + FindInfo{target, seq.index()}); + return {iterator_at(index), true}; } seq.next(); ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); @@ -3526,8 +3548,6 @@ class raw_hash_set { ctrl_t* new_ctrl = common.control(); slot_type* new_slots = set->slot_array(); - const PerTableSeed seed = common.seed(); - for (size_t group_index = 0; group_index < old_capacity; group_index += Group::kWidth) { GroupFullEmptyOrDeleted old_g(old_ctrl + group_index); @@ -3543,7 +3563,7 @@ class raw_hash_set { // TODO(b/382423690): try to avoid entire hash calculation since we need // only one new bit of h1. size_t hash = set->hash_of(old_slot); - size_t h1 = H1(hash, seed); + size_t h1 = H1(hash); h2_t h2 = H2(hash); size_t new_index = TryFindNewIndexWithoutProbing( h1, old_index, old_capacity, new_ctrl, new_capacity); @@ -3586,7 +3606,7 @@ class raw_hash_set { // for standard layout and alignof(Hash) <= alignof(CommonFields). std::is_empty_v ? &GetRefForEmptyClass : &raw_hash_set::get_hash_ref_fn, - PolicyTraits::template get_hash_slot_fn(), + PolicyTraits::template get_hash_slot_fn(), PolicyTraits::transfer_uses_memcpy() ? TransferNRelocatable : &raw_hash_set::transfer_n_slots_fn, @@ -3689,6 +3709,8 @@ struct HashtableDebugAccess> { using Traits = typename Set::PolicyTraits; using Slot = typename Traits::slot_type; + constexpr static bool kIsDefaultHash = Set::kIsDefaultHash; + static size_t GetNumProbes(const Set& set, const typename Set::key_type& key) { if (set.is_soo()) return 0; @@ -3733,16 +3755,21 @@ struct HashtableDebugAccess> { // Extern template instantiations reduce binary size and linker input size. // Function definition is in raw_hash_set.cc. extern template size_t GrowSooTableToNextCapacityAndPrepareInsert<0, false>( - CommonFields&, const PolicyFunctions&, size_t, ctrl_t); + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); extern template size_t GrowSooTableToNextCapacityAndPrepareInsert<1, true>( - CommonFields&, const PolicyFunctions&, size_t, ctrl_t); + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); extern template size_t GrowSooTableToNextCapacityAndPrepareInsert<4, true>( - CommonFields&, const PolicyFunctions&, size_t, ctrl_t); + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); extern template size_t GrowSooTableToNextCapacityAndPrepareInsert<8, true>( - CommonFields&, const PolicyFunctions&, size_t, ctrl_t); + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); #if UINTPTR_MAX == UINT64_MAX extern template size_t GrowSooTableToNextCapacityAndPrepareInsert<16, true>( - CommonFields&, const PolicyFunctions&, size_t, ctrl_t); + CommonFields&, const PolicyFunctions&, absl::FunctionRef, + bool); #endif } // namespace container_internal diff --git a/absl/container/internal/raw_hash_set_allocator_test.cc b/absl/container/internal/raw_hash_set_allocator_test.cc index 7e7a5063d59..2e6f8f5f08f 100644 --- a/absl/container/internal/raw_hash_set_allocator_test.cc +++ b/absl/container/internal/raw_hash_set_allocator_test.cc @@ -180,7 +180,7 @@ struct Policy { static slot_type& element(slot_type* slot) { return *slot; } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return nullptr; } diff --git a/absl/container/internal/raw_hash_set_benchmark.cc b/absl/container/internal/raw_hash_set_benchmark.cc index ac948779412..07a5b90f5a0 100644 --- a/absl/container/internal/raw_hash_set_benchmark.cc +++ b/absl/container/internal/raw_hash_set_benchmark.cc @@ -64,7 +64,7 @@ struct IntPolicy { return std::forward(f)(x, x); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return nullptr; } @@ -127,7 +127,7 @@ class StringPolicy { PairArgs(std::forward(args)...)); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return nullptr; } diff --git a/absl/container/internal/raw_hash_set_probe_benchmark.cc b/absl/container/internal/raw_hash_set_probe_benchmark.cc index e56648f3bf9..458038e0b34 100644 --- a/absl/container/internal/raw_hash_set_probe_benchmark.cc +++ b/absl/container/internal/raw_hash_set_probe_benchmark.cc @@ -71,7 +71,7 @@ struct Policy { return std::forward(f)(arg, arg); } - template + template static constexpr auto get_hash_slot_fn() { return nullptr; } diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index f835b94ded9..e1dafff336b 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -386,7 +386,7 @@ struct ValuePolicy { std::forward(f), std::forward(args)...); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return nullptr; } @@ -534,7 +534,7 @@ class StringPolicy { PairArgs(std::forward(args)...)); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return nullptr; } @@ -1131,7 +1131,7 @@ struct DecomposePolicy { return std::forward(f)(x, x); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return nullptr; } @@ -2814,12 +2814,12 @@ TYPED_TEST(RawHashSamplerTest, Sample) { // Expect that we sampled at the requested sampling rate of ~1%. EXPECT_NEAR((end_size - start_size) / static_cast(tables.size()), 0.01, 0.005); - EXPECT_EQ(observed_checksums.size(), 5); + ASSERT_EQ(observed_checksums.size(), 5); for (const auto& [_, count] : observed_checksums) { EXPECT_NEAR((100 * count) / static_cast(tables.size()), 0.2, 0.05); } - EXPECT_EQ(reservations.size(), 10); + ASSERT_EQ(reservations.size(), 10); for (const auto& [reservation, count] : reservations) { EXPECT_GE(reservation, 0); EXPECT_LT(reservation, 100); @@ -4057,7 +4057,7 @@ TEST(Table, GrowExtremelyLargeTable) { CommonFields& common = RawHashSetTestOnlyAccess::GetCommon(t); // Set 0 seed so that H1 is always 0. common.set_no_seed_for_testing(); - ASSERT_EQ(H1(t.hash_function()(75), common.seed()), 0); + ASSERT_EQ(H1(t.hash_function()(75)), 0); uint8_t inserted_till = 210; for (uint8_t i = 0; i < inserted_till; ++i) { t.insert(i); @@ -4081,6 +4081,16 @@ TEST(Table, GrowExtremelyLargeTable) { EXPECT_EQ(t.capacity(), kTargetCapacity); } +// Test that after calling generate_new_seed(), the high bits of the returned +// seed are non-zero. +TEST(PerTableSeed, HighBitsAreNonZero) { + HashtableSize hs(no_seed_empty_tag_t{}); + for (int i = 0; i < 100; ++i) { + hs.generate_new_seed(); + ASSERT_GT(hs.seed().seed() >> 16, 0); + } +} + } // namespace } // namespace container_internal ABSL_NAMESPACE_END diff --git a/absl/container/node_hash_map.h b/absl/container/node_hash_map.h index 5f6be95acb9..46faa8951c6 100644 --- a/absl/container/node_hash_map.h +++ b/absl/container/node_hash_map.h @@ -663,10 +663,10 @@ class NodeHashMapPolicy static Value& value(value_type* elem) { return elem->second; } static const Value& value(const value_type* elem) { return elem->second; } - template + template static constexpr HashSlotFn get_hash_slot_fn() { return memory_internal::IsLayoutCompatible::value - ? &TypeErasedDerefAndApplyToSlotFn + ? &TypeErasedDerefAndApplyToSlotFn : nullptr; } }; diff --git a/absl/container/node_hash_set.h b/absl/container/node_hash_set.h index 127c64008ba..9eef870ff81 100644 --- a/absl/container/node_hash_set.h +++ b/absl/container/node_hash_set.h @@ -557,9 +557,9 @@ struct NodeHashSetPolicy static size_t element_space_used(const T*) { return sizeof(T); } - template + template static constexpr HashSlotFn get_hash_slot_fn() { - return &TypeErasedDerefAndApplyToSlotFn; + return &TypeErasedDerefAndApplyToSlotFn; } }; } // namespace container_internal diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 9eacccdd430..3e27d3fba03 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -353,6 +353,15 @@ struct CombineRaw { } }; +// For use in `raw_hash_set` to pass a seed to the hash function. +struct HashWithSeed { + template + size_t hash(const Hasher& hasher, const T& value, size_t seed) const { + // NOLINTNEXTLINE(clang-diagnostic-sign-conversion) + return hasher.hash_with_seed(value, seed); + } +}; + // Convenience function that combines `hash_state` with the byte representation // of `value`. template { } using MixingHashState::HashStateBase::combine_contiguous; + template + static size_t hash(const T& value) { + return hash_with_seed(value, Seed()); + } + // For performance reasons in non-opt mode, we specialize this for // integral types. // Otherwise we would be instantiating and calling dozens of functions for // something that is just one multiplication and a couple xor's. // The result should be the same as running the whole algorithm, but faster. template ::value, int> = 0> - static size_t hash(T value) { + static size_t hash_with_seed(T value, size_t seed) { return static_cast( - Mix(Seed() ^ static_cast>(value), kMul)); + Mix(seed ^ static_cast>(value), kMul)); } template ::value, int> = 0> - static size_t hash(const T& value) { - return static_cast(combine(MixingHashState{}, value).state_); + static size_t hash_with_seed(const T& value, size_t seed) { + return static_cast(combine(MixingHashState{seed}, value).state_); } private: @@ -1366,6 +1380,13 @@ struct HashImpl { size_t operator()(const T& value) const { return MixingHashState::hash(value); } + + private: + friend struct HashWithSeed; + + size_t hash_with_seed(const T& value, size_t seed) const { + return MixingHashState::hash_with_seed(value, seed); + } }; template From 23d40c5dbdef4ffb8d53860ff9e6d81c57476eab Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Tue, 17 Jun 2025 10:13:19 -0700 Subject: [PATCH 079/107] Automated Code Change PiperOrigin-RevId: 772521627 Change-Id: Icb831a39f266e0dfa70a1f2f0f1dd667414ef663 --- absl/strings/str_split.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h index 7e8e31c3d8b..761568a31b1 100644 --- a/absl/strings/str_split.h +++ b/absl/strings/str_split.h @@ -277,7 +277,7 @@ template class MaxSplitsImpl { public: MaxSplitsImpl(Delimiter delimiter, int limit) - : delimiter_(delimiter), limit_(limit), count_(0) {} + : delimiter_(std::move(delimiter)), limit_(limit), count_(0) {} absl::string_view Find(absl::string_view text, size_t pos) { if (count_++ == limit_) { return absl::string_view(text.data() + text.size(), From cfe791308d7a8d30dcdb199ddbac61382669b433 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Wed, 18 Jun 2025 09:28:35 -0700 Subject: [PATCH 080/107] Make bool true hash be ~size_t{} instead of 1 so that all bits are different between true/false instead of only one. PiperOrigin-RevId: 772950705 Change-Id: I9ac8aa01c87684544b1d21bb7237680711ba64b0 --- absl/hash/internal/hash.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 3e27d3fba03..96f4900424a 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -409,8 +409,10 @@ H hash_weakly_mixed_integer(H hash_state, WeaklyMixedInteger value) { template typename std::enable_if::value, H>::type AbslHashValue( H hash_state, B value) { + // We use ~size_t{} instead of 1 so that all bits are different between + // true/false instead of only 1. return H::combine(std::move(hash_state), - static_cast(value ? 1 : 0)); + static_cast(value ? ~size_t{} : 0)); } // AbslHashValue() for hashing enum values From 6ee80c1d4ec5eb89e0c4879160294079267bdb3d Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Wed, 18 Jun 2025 11:30:06 -0700 Subject: [PATCH 081/107] CMake: Add a fatal error when the compiler defaults to or is set to a C++ language standard prior to C++17. Users encountering this error should set -DCMAKE_CXX_STANDARD=17 (or a higher standard), or find an equivalent method of specifying the correct C++ standard for the entire build. The current behavior is to try to detect the standard being used and upgrade it to C++17 if it defaults to or is set to a prior version. However, CMake also generates options.h for installation, and it does this without the automatic upgrade, which leads to an inconsistency. Another possible fix is to not do the automatic upgrade at all, but then we fail the build at a later step, so better just to add an error earlier in the process. PiperOrigin-RevId: 772997895 Change-Id: I5ace8ecf5799cacf6010bbba4d880004e0bc9650 --- CMake/AbseilDll.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index 9f88825e144..0ebeebb4cef 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -716,8 +716,10 @@ int main() { return 0; } if(ABSL_INTERNAL_AT_LEAST_CXX20) set(ABSL_INTERNAL_CXX_STD_FEATURE cxx_std_20) -else() +elseif(ABSL_INTERNAL_AT_LEAST_CXX17) set(ABSL_INTERNAL_CXX_STD_FEATURE cxx_std_17) +else() + message(FATAL_ERROR "The compiler defaults to or is configured for C++ < 17. C++ >= 17 is required and Abseil and all libraries that use Abseil must use the same C++ language standard") endif() function(absl_internal_dll_contains) From 278d661fb6a117497cd2892ef10c794cded88e36 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 19 Jun 2025 02:57:07 -0700 Subject: [PATCH 082/107] Change return type of hash internal `Seed` to `size_t` from `uint64_t` To avoid shorten-64-to-32 warning on 32-bit platforms. The implementation of this function already generates uintptr_t, which would be 32-bit on those platforms PiperOrigin-RevId: 773273176 Change-Id: Id0c502c482fbfe552310fd5047882a026638c0d0 --- absl/hash/internal/hash.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 96f4900424a..3af6c07d25a 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -1351,15 +1351,15 @@ class ABSL_DLL MixingHashState : public HashStateBase { // // On other platforms this is still going to be non-deterministic but most // probably per-build and not per-process. - ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Seed() { + ABSL_ATTRIBUTE_ALWAYS_INLINE static size_t Seed() { #if (!defined(__clang__) || __clang_major__ > 11) && \ (!defined(__apple_build_version__) || \ __apple_build_version__ >= 19558921) // Xcode 12 - return static_cast(reinterpret_cast(&kSeed)); + return static_cast(reinterpret_cast(&kSeed)); #else // Workaround the absence of // https://github.com/llvm/llvm-project/commit/bc15bf66dcca76cc06fe71fca35b74dc4d521021. - return static_cast(reinterpret_cast(kSeed)); + return static_cast(reinterpret_cast(kSeed)); #endif } From 212fcb96c8a5218e652b8502f297d236d7fbe3af Mon Sep 17 00:00:00 2001 From: Tomas Dzetkulic Date: Thu, 19 Jun 2025 06:42:48 -0700 Subject: [PATCH 083/107] Change DurationFromDouble to return -InfiniteDuration() for all NaNs. If NaN is produced in the arithmetic operation than the sign bit is implementation-defined and mostly irrelevant. In particular Arm and x86 produce different results. There is no good reason to produce different results based on the sign of NaN. PiperOrigin-RevId: 773329134 Change-Id: I8df42e90a677402550ad76f0d3dde8521e4b7725 --- absl/time/time.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/absl/time/time.h b/absl/time/time.h index 53bca90d156..29beaac19af 100644 --- a/absl/time/time.h +++ b/absl/time/time.h @@ -589,9 +589,10 @@ ABSL_ATTRIBUTE_CONST_FUNCTION Duration Seconds(T n) { } return time_internal::MakePosDoubleDuration(n); } else { - if (std::isnan(n)) - return std::signbit(n) ? -InfiniteDuration() : InfiniteDuration(); - if (n <= (std::numeric_limits::min)()) return -InfiniteDuration(); + if (std::isnan(n)) return -InfiniteDuration(); + if (n <= static_cast((std::numeric_limits::min)())) { + return -InfiniteDuration(); + } return -time_internal::MakePosDoubleDuration(-n); } } From 9e7f3c020b6727e31bcce7086f9c74c33ae692be Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Fri, 20 Jun 2025 09:54:09 -0700 Subject: [PATCH 084/107] Improve mixing on 32-bit platforms. The previous version of Mix doesn't have any entropy from the high bits of the input in the low bits of the output. This can be a problem for 64-bit types whose high bits are salient, such as double. Motivation: when adding debug asserts to detect cases of long probe sequences in absl hash tables, we see assertion failures for the case of inserting [-1024, 1024] into flat_hash_set in 32-bit architectures. PiperOrigin-RevId: 773741924 Change-Id: I5b718cec08f175376bf46adbec53500956c392cf --- absl/container/flat_hash_set_test.cc | 15 +++++++++++++++ absl/hash/internal/hash.h | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/absl/container/flat_hash_set_test.cc b/absl/container/flat_hash_set_test.cc index ca069b402ad..f2c3fbd3fd8 100644 --- a/absl/container/flat_hash_set_test.cc +++ b/absl/container/flat_hash_set_test.cc @@ -397,6 +397,21 @@ TEST(FlatHashSet, IsDefaultHash) { false); } +// Test that we don't cause excessive collisions on the hash table for +// doubles in the range [-1024, 1024]. See cl/773069881 for more information. +TEST(FlatHashSet, DoubleRange) { + using absl::container_internal::hashtable_debug_internal:: + HashtableDebugAccess; + absl::flat_hash_set set; + for (double t = -1024.0; t < 1024.0; t += 1.0) { + set.insert(t); + } + for (double t = -1024.0; t < 1024.0; t += 1.0) { + ASSERT_LT(HashtableDebugAccess::GetNumProbes(set, t), 64) + << t; + } +} + } // namespace } // namespace container_internal ABSL_NAMESPACE_END diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 3af6c07d25a..dc374055121 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -963,7 +963,7 @@ ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t Mix(uint64_t lhs, uint64_t rhs) { // For 32 bit platforms we are trying to use all 64 lower bits. if constexpr (sizeof(size_t) < 8) { uint64_t m = lhs * rhs; - return m ^ (m >> 32); + return m ^ absl::byteswap(m); } // absl::uint128 is not an alias or a thin wrapper around the intrinsic. // We use the intrinsic when available to improve performance. From f60bfd822e5be101269561043eeea5f1937392cb Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Fri, 20 Jun 2025 10:12:46 -0700 Subject: [PATCH 085/107] Enable SIMD memcpy-crc on ARM cores. PiperOrigin-RevId: 773749299 Change-Id: I798913549298c0993af16fc3ab6215089aab1f18 --- absl/crc/internal/crc_memcpy_x86_arm_combined.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/absl/crc/internal/crc_memcpy_x86_arm_combined.cc b/absl/crc/internal/crc_memcpy_x86_arm_combined.cc index 38f61e9b1fd..247b3aa9d1f 100644 --- a/absl/crc/internal/crc_memcpy_x86_arm_combined.cc +++ b/absl/crc/internal/crc_memcpy_x86_arm_combined.cc @@ -422,6 +422,11 @@ CrcMemcpy::ArchSpecificEngines CrcMemcpy::GetArchSpecificEngines() { }; // INTEL_SANDYBRIDGE performs better with SSE than AVX. case CpuType::kIntelSandybridge: + // Use SIMD memcpy on ARM cores. + case CpuType::kArmNeoverseN1: + case CpuType::kArmNeoverseN2: + case CpuType::kArmNeoverseV1: + case CpuType::kArmNeoverseV2: return { /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(), /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(), From b40953d88697d4c6daeb66e7502a9abad8f29779 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 23 Jun 2025 03:04:04 -0700 Subject: [PATCH 086/107] Enable `operator==` for `StatusOr` only if the contained type is equality-comparable PiperOrigin-RevId: 774693039 Change-Id: I915ce87aa37094d1596618cf2604d0bd98583218 --- absl/status/internal/statusor_internal.h | 10 ++++++++++ absl/status/statusor.h | 8 ++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/absl/status/internal/statusor_internal.h b/absl/status/internal/statusor_internal.h index e986611396e..029fdeea385 100644 --- a/absl/status/internal/statusor_internal.h +++ b/absl/status/internal/statusor_internal.h @@ -46,6 +46,16 @@ template struct HasConversionOperatorToStatusOr(0))> : std::true_type {}; +// Detects whether `T` is equality-comparable. +template +struct IsEqualityComparable : std::false_type {}; + +template +struct IsEqualityComparable< + T, std::enable_if_t() == std::declval()), + bool>::value>> : std::true_type {}; + // Detects whether `T` is constructible or convertible from `StatusOr`. template using IsConstructibleOrConvertibleFromStatusOr = diff --git a/absl/status/statusor.h b/absl/status/statusor.h index 6142a2f8dd7..25c62887bf7 100644 --- a/absl/status/statusor.h +++ b/absl/status/statusor.h @@ -607,7 +607,9 @@ class StatusOr : private internal_statusor::StatusOrData, // operator==() // // This operator checks the equality of two `absl::StatusOr` objects. -template +template ::value, + int> = 0> bool operator==(const StatusOr& lhs, const StatusOr& rhs) { if (lhs.ok() && rhs.ok()) return *lhs == *rhs; return lhs.status() == rhs.status(); @@ -616,7 +618,9 @@ bool operator==(const StatusOr& lhs, const StatusOr& rhs) { // operator!=() // // This operator checks the inequality of two `absl::StatusOr` objects. -template +template ::value, + int> = 0> bool operator!=(const StatusOr& lhs, const StatusOr& rhs) { return !(lhs == rhs); } From 2b320cbfaa4e6e6ee0d04ab120e1224eabb349a6 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 23 Jun 2025 10:31:23 -0700 Subject: [PATCH 087/107] Fix includes and fuse constructors of SpinLock. PiperOrigin-RevId: 774835829 Change-Id: I0fa7cab1b98c1b7222de0acd71b7846df693f1e2 --- absl/base/internal/spinlock.cc | 34 +++++------ absl/base/internal/spinlock.h | 51 +++++++++++----- absl/base/internal/thread_identity_test.cc | 2 +- absl/base/spinlock_test_common.cc | 53 ++++++++++------ absl/debugging/symbolize_elf.inc | 23 +++---- absl/log/internal/vlog_config.cc | 2 +- absl/strings/internal/cordz_info.h | 4 +- .../internal/create_thread_identity.cc | 2 +- absl/synchronization/internal/graphcycles.cc | 60 +++++++++---------- absl/synchronization/mutex.cc | 17 +++--- absl/time/clock.cc | 21 ++++--- 11 files changed, 151 insertions(+), 118 deletions(-) diff --git a/absl/base/internal/spinlock.cc b/absl/base/internal/spinlock.cc index 430f775bdf9..4168b8b728a 100644 --- a/absl/base/internal/spinlock.cc +++ b/absl/base/internal/spinlock.cc @@ -16,15 +16,18 @@ #include #include +#include #include #include "absl/base/attributes.h" +#include "absl/base/call_once.h" #include "absl/base/config.h" #include "absl/base/internal/atomic_hook.h" #include "absl/base/internal/cycleclock.h" +#include "absl/base/internal/scheduling_mode.h" #include "absl/base/internal/spinlock_wait.h" #include "absl/base/internal/sysinfo.h" /* For NumCPUs() */ -#include "absl/base/call_once.h" +#include "absl/base/internal/tsan_mutex_interface.h" // Description of lock-word: // 31..00: [............................3][2][1][0] @@ -58,7 +61,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace base_internal { -ABSL_INTERNAL_ATOMIC_HOOK_ATTRIBUTES static base_internal::AtomicHook submit_profile_data; @@ -67,12 +70,6 @@ void RegisterSpinLockProfiler(void (*fn)(const void *contendedlock, submit_profile_data.Store(fn); } -// Uncommon constructors. -SpinLock::SpinLock(base_internal::SchedulingMode mode) - : lockword_(IsCooperative(mode) ? kSpinLockCooperative : 0) { - ABSL_TSAN_MUTEX_CREATE(this, __tsan_mutex_not_static); -} - // Monitor the lock to see if its value changes within some time period // (adaptive_spin_count loop iterations). The last value read from the lock // is returned from the method. @@ -81,9 +78,8 @@ uint32_t SpinLock::SpinLoop() { // adaptive_spin_count here. ABSL_CONST_INIT static absl::once_flag init_adaptive_spin_count; ABSL_CONST_INIT static int adaptive_spin_count = 0; - base_internal::LowLevelCallOnce(&init_adaptive_spin_count, []() { - adaptive_spin_count = base_internal::NumCPUs() > 1 ? 1000 : 1; - }); + LowLevelCallOnce(&init_adaptive_spin_count, + []() { adaptive_spin_count = NumCPUs() > 1 ? 1000 : 1; }); int c = adaptive_spin_count; uint32_t lock_value; @@ -100,11 +96,11 @@ void SpinLock::SlowLock() { return; } - base_internal::SchedulingMode scheduling_mode; + SchedulingMode scheduling_mode; if ((lock_value & kSpinLockCooperative) != 0) { - scheduling_mode = base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL; + scheduling_mode = SCHEDULE_COOPERATIVE_AND_KERNEL; } else { - scheduling_mode = base_internal::SCHEDULE_KERNEL_ONLY; + scheduling_mode = SCHEDULE_KERNEL_ONLY; } // The lock was not obtained initially, so this thread needs to wait for @@ -134,7 +130,7 @@ void SpinLock::SlowLock() { // new lock state will be the number of cycles this thread waited if // this thread obtains the lock. lock_value = TryLockInternal(lock_value, wait_cycles); - continue; // Skip the delay at the end of the loop. + continue; // Skip the delay at the end of the loop. } else if ((lock_value & kWaitTimeMask) == 0) { // The lock is still held, without a waiter being marked, but something // else about the lock word changed, causing our CAS to fail. For @@ -150,8 +146,8 @@ void SpinLock::SlowLock() { // synchronization there to avoid false positives. ABSL_TSAN_MUTEX_PRE_DIVERT(this, 0); // Wait for an OS specific delay. - base_internal::SpinLockDelay(&lockword_, lock_value, ++lock_wait_call_count, - scheduling_mode); + SpinLockDelay(&lockword_, lock_value, ++lock_wait_call_count, + scheduling_mode); ABSL_TSAN_MUTEX_POST_DIVERT(this, 0); // Spin again after returning from the wait routine to give this thread // some chance of obtaining the lock. @@ -162,8 +158,8 @@ void SpinLock::SlowLock() { } void SpinLock::SlowUnlock(uint32_t lock_value) { - base_internal::SpinLockWake(&lockword_, - false); // wake waiter if necessary + SpinLockWake(&lockword_, + false); // wake waiter if necessary // If our acquisition was contended, collect contentionz profile info. We // reserve a unitary wait time to represent that a waiter exists without our diff --git a/absl/base/internal/spinlock.h b/absl/base/internal/spinlock.h index 2a108969767..022dcf2373b 100644 --- a/absl/base/internal/spinlock.h +++ b/absl/base/internal/spinlock.h @@ -19,7 +19,7 @@ // - for use by Abseil internal code that Mutex itself depends on // - for async signal safety (see below) -// SpinLock with a base_internal::SchedulingMode::SCHEDULE_KERNEL_ONLY is async +// SpinLock with a SchedulingMode::SCHEDULE_KERNEL_ONLY is async // signal safe. If a spinlock is used within a signal handler, all code that // acquires the lock must ensure that the signal cannot arrive while they are // holding the lock. Typically, this is done by blocking the signal. @@ -31,14 +31,16 @@ #include #include +#include #include "absl/base/attributes.h" +#include "absl/base/config.h" #include "absl/base/const_init.h" -#include "absl/base/dynamic_annotations.h" #include "absl/base/internal/low_level_scheduling.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/scheduling_mode.h" #include "absl/base/internal/tsan_mutex_interface.h" +#include "absl/base/macros.h" #include "absl/base/thread_annotations.h" namespace tcmalloc { @@ -55,17 +57,31 @@ namespace base_internal { class ABSL_LOCKABLE ABSL_ATTRIBUTE_WARN_UNUSED SpinLock { public: - SpinLock() : lockword_(kSpinLockCooperative) { - ABSL_TSAN_MUTEX_CREATE(this, __tsan_mutex_not_static); - } + constexpr SpinLock() : lockword_(kSpinLockCooperative) { RegisterWithTsan(); } // Constructors that allow non-cooperative spinlocks to be created for use // inside thread schedulers. Normal clients should not use these. - explicit SpinLock(base_internal::SchedulingMode mode); + constexpr explicit SpinLock(SchedulingMode mode) + : lockword_(IsCooperative(mode) ? kSpinLockCooperative : 0) { + RegisterWithTsan(); + } + +#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(_WIN32) + // Constructor to inline users of the default scheduling mode. + // + // This only needs to exists for inliner runs, but doesn't work correctly in + // clang+windows builds, likely due to mangling differences. + ABSL_DEPRECATE_AND_INLINE() + constexpr explicit SpinLock(SchedulingMode mode) + __attribute__((enable_if(mode == SCHEDULE_COOPERATIVE_AND_KERNEL, + "Cooperative use default constructor"))) + : SpinLock() {} +#endif // Constructor for global SpinLock instances. See absl/base/const_init.h. - constexpr SpinLock(absl::ConstInitType, base_internal::SchedulingMode mode) - : lockword_(IsCooperative(mode) ? kSpinLockCooperative : 0) {} + ABSL_DEPRECATE_AND_INLINE() + constexpr SpinLock(absl::ConstInitType, SchedulingMode mode) + : SpinLock(mode) {} // For global SpinLock instances prefer trivial destructor when possible. // Default but non-trivial destructor in some build configurations causes an @@ -106,7 +122,7 @@ class ABSL_LOCKABLE ABSL_ATTRIBUTE_WARN_UNUSED SpinLock { std::memory_order_release); if ((lock_value & kSpinLockDisabledScheduling) != 0) { - base_internal::SchedulingGuard::EnableRescheduling(true); + SchedulingGuard::EnableRescheduling(true); } if ((lock_value & kWaitTimeMask) != 0) { // Collect contentionz profile info, and speed the wakeup of any waiter. @@ -175,9 +191,16 @@ class ABSL_LOCKABLE ABSL_ATTRIBUTE_WARN_UNUSED SpinLock { ~(kSpinLockHeld | kSpinLockCooperative | kSpinLockDisabledScheduling); // Returns true if the provided scheduling mode is cooperative. - static constexpr bool IsCooperative( - base_internal::SchedulingMode scheduling_mode) { - return scheduling_mode == base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL; + static constexpr bool IsCooperative(SchedulingMode scheduling_mode) { + return scheduling_mode == SCHEDULE_COOPERATIVE_AND_KERNEL; + } + + constexpr void RegisterWithTsan() { +#if ABSL_HAVE_BUILTIN(__builtin_is_constant_evaluated) + if (!__builtin_is_constant_evaluated()) { + ABSL_TSAN_MUTEX_CREATE(this, __tsan_mutex_not_static); + } +#endif } bool IsCooperative() const { @@ -243,7 +266,7 @@ inline uint32_t SpinLock::TryLockInternal(uint32_t lock_value, if ((lock_value & kSpinLockCooperative) == 0) { // For non-cooperative locks we must make sure we mark ourselves as // non-reschedulable before we attempt to CompareAndSwap. - if (base_internal::SchedulingGuard::DisableRescheduling()) { + if (SchedulingGuard::DisableRescheduling()) { sched_disabled_bit = kSpinLockDisabledScheduling; } } @@ -252,7 +275,7 @@ inline uint32_t SpinLock::TryLockInternal(uint32_t lock_value, lock_value, kSpinLockHeld | lock_value | wait_cycles | sched_disabled_bit, std::memory_order_acquire, std::memory_order_relaxed)) { - base_internal::SchedulingGuard::EnableRescheduling(sched_disabled_bit != 0); + SchedulingGuard::EnableRescheduling(sched_disabled_bit != 0); } return lock_value; diff --git a/absl/base/internal/thread_identity_test.cc b/absl/base/internal/thread_identity_test.cc index 5f17553e648..40522911d5d 100644 --- a/absl/base/internal/thread_identity_test.cc +++ b/absl/base/internal/thread_identity_test.cc @@ -31,7 +31,7 @@ namespace base_internal { namespace { ABSL_CONST_INIT static absl::base_internal::SpinLock map_lock( - absl::kConstInit, base_internal::SCHEDULE_KERNEL_ONLY); + base_internal::SCHEDULE_KERNEL_ONLY); ABSL_CONST_INIT static int num_identities_reused ABSL_GUARDED_BY(map_lock); static const void* const kCheckNoIdentity = reinterpret_cast(1); diff --git a/absl/base/spinlock_test_common.cc b/absl/base/spinlock_test_common.cc index e9047158cbf..dd8b3cb5e49 100644 --- a/absl/base/spinlock_test_common.cc +++ b/absl/base/spinlock_test_common.cc @@ -60,24 +60,41 @@ namespace { static constexpr size_t kArrayLength = 10; static uint32_t values[kArrayLength]; -ABSL_CONST_INIT static SpinLock static_cooperative_spinlock( - absl::kConstInit, base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL); +ABSL_CONST_INIT static SpinLock static_cooperative_spinlock; ABSL_CONST_INIT static SpinLock static_noncooperative_spinlock( - absl::kConstInit, base_internal::SCHEDULE_KERNEL_ONLY); + base_internal::SCHEDULE_KERNEL_ONLY); // Simple integer hash function based on the public domain lookup2 hash. // http://burtleburtle.net/bob/c/lookup2.c static uint32_t Hash32(uint32_t a, uint32_t c) { uint32_t b = 0x9e3779b9UL; // The golden ratio; an arbitrary value. - a -= b; a -= c; a ^= (c >> 13); - b -= c; b -= a; b ^= (a << 8); - c -= a; c -= b; c ^= (b >> 13); - a -= b; a -= c; a ^= (c >> 12); - b -= c; b -= a; b ^= (a << 16); - c -= a; c -= b; c ^= (b >> 5); - a -= b; a -= c; a ^= (c >> 3); - b -= c; b -= a; b ^= (a << 10); - c -= a; c -= b; c ^= (b >> 15); + a -= b; + a -= c; + a ^= (c >> 13); + b -= c; + b -= a; + b ^= (a << 8); + c -= a; + c -= b; + c ^= (b >> 13); + a -= b; + a -= c; + a ^= (c >> 12); + b -= c; + b -= a; + b ^= (a << 16); + c -= a; + c -= b; + c ^= (b >> 5); + a -= b; + a -= c; + a ^= (c >> 3); + b -= c; + b -= a; + b ^= (a << 10); + c -= a; + c -= b; + c ^= (b >> 15); return c; } @@ -134,7 +151,7 @@ TEST(SpinLock, WaitCyclesEncoding) { // We should be able to encode up to (1^kMaxCycleBits - 1) without clamping // but the lower kProfileTimestampShift will be dropped. const int kMaxCyclesShift = - 32 - kLockwordReservedShift + kProfileTimestampShift; + 32 - kLockwordReservedShift + kProfileTimestampShift; const int64_t kMaxCycles = (int64_t{1} << kMaxCyclesShift) - 1; // These bits should be zero after encoding. @@ -171,22 +188,22 @@ TEST(SpinLock, WaitCyclesEncoding) { SpinLockTest::DecodeWaitCycles(~kLockwordReservedMask)); // Check that we cannot produce kSpinLockSleeper during encoding. - int64_t sleeper_cycles = - kSpinLockSleeper << (kProfileTimestampShift - kLockwordReservedShift); + int64_t sleeper_cycles = kSpinLockSleeper + << (kProfileTimestampShift - kLockwordReservedShift); uint32_t sleeper_value = SpinLockTest::EncodeWaitCycles(start_time, start_time + sleeper_cycles); EXPECT_NE(sleeper_value, kSpinLockSleeper); // Test clamping uint32_t max_value = - SpinLockTest::EncodeWaitCycles(start_time, start_time + kMaxCycles); + SpinLockTest::EncodeWaitCycles(start_time, start_time + kMaxCycles); int64_t max_value_decoded = SpinLockTest::DecodeWaitCycles(max_value); int64_t expected_max_value_decoded = kMaxCycles & ~kProfileTimestampMask; EXPECT_EQ(expected_max_value_decoded, max_value_decoded); const int64_t step = (1 << kProfileTimestampShift); - uint32_t after_max_value = - SpinLockTest::EncodeWaitCycles(start_time, start_time + kMaxCycles + step); + uint32_t after_max_value = SpinLockTest::EncodeWaitCycles( + start_time, start_time + kMaxCycles + step); int64_t after_max_value_decoded = SpinLockTest::DecodeWaitCycles(after_max_value); EXPECT_EQ(expected_max_value_decoded, after_max_value_decoded); diff --git a/absl/debugging/symbolize_elf.inc b/absl/debugging/symbolize_elf.inc index 9836c93295c..17baff4ac69 100644 --- a/absl/debugging/symbolize_elf.inc +++ b/absl/debugging/symbolize_elf.inc @@ -171,18 +171,18 @@ struct FileMappingHint { // is being modified (is busy), we skip all decorators, and possibly // loose some info. Sorry, that's the best we could do. ABSL_CONST_INIT absl::base_internal::SpinLock g_decorators_mu( - absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); + absl::base_internal::SCHEDULE_KERNEL_ONLY); const int kMaxFileMappingHints = 8; int g_num_file_mapping_hints; FileMappingHint g_file_mapping_hints[kMaxFileMappingHints]; // Protects g_file_mapping_hints. ABSL_CONST_INIT absl::base_internal::SpinLock g_file_mapping_mu( - absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); + absl::base_internal::SCHEDULE_KERNEL_ONLY); // Async-signal-safe function to zero a buffer. // memset() is not guaranteed to be async-signal-safe. -static void SafeMemZero(void* p, size_t size) { +static void SafeMemZero(void *p, size_t size) { unsigned char *c = static_cast(p); while (size--) { *c++ = 0; @@ -1469,14 +1469,15 @@ static bool MaybeInitializeObjFile(ObjFile *obj) { constexpr int interesting = PF_X | PF_R; #endif - if (phdr.p_type != PT_LOAD - || (phdr.p_flags & interesting) != interesting) { + if (phdr.p_type != PT_LOAD || + (phdr.p_flags & interesting) != interesting) { // Not a LOAD segment, not executable code, and not a function // descriptor. continue; } if (num_interesting_load_segments < obj->phdr.size()) { - memcpy(&obj->phdr[num_interesting_load_segments++], &phdr, sizeof(phdr)); + memcpy(&obj->phdr[num_interesting_load_segments++], &phdr, + sizeof(phdr)); } else { ABSL_RAW_LOG( WARNING, "%s: too many interesting LOAD segments: %zu >= %zu", @@ -1525,7 +1526,8 @@ const char *Symbolizer::GetUncachedSymbol(const void *pc) { ABSL_RAW_CHECK(p.p_type == PT_NULL, "unexpected p_type"); break; } - if (pc < reinterpret_cast(start_addr + p.p_vaddr + p.p_memsz)) { + if (pc < + reinterpret_cast(start_addr + p.p_vaddr + p.p_memsz)) { phdr = &p; break; } @@ -1671,8 +1673,8 @@ int InstallSymbolDecorator(SymbolDecorator decorator, void *arg) { return ret; } -bool RegisterFileMappingHint(const void *start, const void *end, uint64_t offset, - const char *filename) { +bool RegisterFileMappingHint(const void *start, const void *end, + uint64_t offset, const char *filename) { SAFE_ASSERT(start <= end); SAFE_ASSERT(filename != nullptr); @@ -1765,7 +1767,8 @@ ABSL_NAMESPACE_END } // namespace absl extern "C" bool AbslInternalGetFileMappingHint(const void **start, - const void **end, uint64_t *offset, + const void **end, + uint64_t *offset, const char **filename) { return absl::debugging_internal::GetFileMappingHint(start, end, offset, filename); diff --git a/absl/log/internal/vlog_config.cc b/absl/log/internal/vlog_config.cc index f7c61bed52a..040038fad28 100644 --- a/absl/log/internal/vlog_config.cc +++ b/absl/log/internal/vlog_config.cc @@ -90,7 +90,7 @@ struct VModuleInfo final { // To avoid problems with the heap checker which calls into `VLOG`, `mutex` must // be a `SpinLock` that prevents fiber scheduling instead of a `Mutex`. ABSL_CONST_INIT absl::base_internal::SpinLock mutex( - absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); + absl::base_internal::SCHEDULE_KERNEL_ONLY); // `GetUpdateSitesMutex()` serializes updates to all of the sites (i.e. those in // `site_list_head`) themselves. diff --git a/absl/strings/internal/cordz_info.h b/absl/strings/internal/cordz_info.h index 2dc9d16def0..0091fa2ea2b 100644 --- a/absl/strings/internal/cordz_info.h +++ b/absl/strings/internal/cordz_info.h @@ -191,9 +191,7 @@ class ABSL_LOCKABLE CordzInfo : public CordzHandle { // Global cordz info list. CordzInfo stores a pointer to the global list // instance to harden against ODR violations. struct List { - constexpr explicit List(absl::ConstInitType) - : mutex(absl::kConstInit, - absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + constexpr explicit List(absl::ConstInitType) {} SpinLock mutex; std::atomic head ABSL_GUARDED_BY(mutex){nullptr}; diff --git a/absl/synchronization/internal/create_thread_identity.cc b/absl/synchronization/internal/create_thread_identity.cc index 93cd376bde0..2ec8075019a 100644 --- a/absl/synchronization/internal/create_thread_identity.cc +++ b/absl/synchronization/internal/create_thread_identity.cc @@ -35,7 +35,7 @@ namespace synchronization_internal { // ThreadIdentity storage is persistent, we maintain a free-list of previously // released ThreadIdentity objects. ABSL_CONST_INIT static base_internal::SpinLock freelist_lock( - absl::kConstInit, base_internal::SCHEDULE_KERNEL_ONLY); + base_internal::SCHEDULE_KERNEL_ONLY); ABSL_CONST_INIT static base_internal::ThreadIdentity* thread_identity_freelist; // A per-thread destructor for reclaiming associated ThreadIdentity objects. diff --git a/absl/synchronization/internal/graphcycles.cc b/absl/synchronization/internal/graphcycles.cc index 129067c1515..914c81cd6fa 100644 --- a/absl/synchronization/internal/graphcycles.cc +++ b/absl/synchronization/internal/graphcycles.cc @@ -33,15 +33,15 @@ #include "absl/base/internal/low_level_alloc.h" #ifndef ABSL_LOW_LEVEL_ALLOC_MISSING -#include "absl/synchronization/internal/graphcycles.h" - #include #include #include #include + #include "absl/base/internal/hide_ptr.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/spinlock.h" +#include "absl/synchronization/internal/graphcycles.h" // Do not use STL. This module does not use standard memory allocation. @@ -54,7 +54,7 @@ namespace { // Avoid LowLevelAlloc's default arena since it calls malloc hooks in // which people are doing things like acquiring Mutexes. ABSL_CONST_INIT static absl::base_internal::SpinLock arena_mu( - absl::kConstInit, base_internal::SCHEDULE_KERNEL_ONLY); + base_internal::SCHEDULE_KERNEL_ONLY); ABSL_CONST_INIT static base_internal::LowLevelAlloc::Arena* arena; static void InitArenaIfNecessary() { @@ -89,7 +89,7 @@ class Vec { T* end() { return ptr_ + size_; } const T& operator[](uint32_t i) const { return ptr_[i]; } T& operator[](uint32_t i) { return ptr_[i]; } - const T& back() const { return ptr_[size_-1]; } + const T& back() const { return ptr_[size_ - 1]; } void pop_back() { size_--; } void push_back(const T& v) { @@ -178,7 +178,7 @@ class NodeSet { } table_[i] = v; // Double when 75% full. - if (occupied_ >= table_.size() - table_.size()/4) Grow(); + if (occupied_ >= table_.size() - table_.size() / 4) Grow(); return true; } @@ -193,7 +193,7 @@ class NodeSet { // Example: // HASH_FOR_EACH(elem, node->out) { ... } #define HASH_FOR_EACH(elem, eset) \ - for (int32_t elem, _cursor = 0; (eset).Next(&_cursor, &elem); ) + for (int32_t elem, _cursor = 0; (eset).Next(&_cursor, &elem);) bool Next(int32_t* cursor, int32_t* elem) { while (static_cast(*cursor) < table_.size()) { int32_t v = table_[static_cast(*cursor)]; @@ -209,7 +209,7 @@ class NodeSet { private: enum : int32_t { kEmpty = -1, kDel = -2 }; Vec table_; - uint32_t occupied_; // Count of non-empty slots (includes deleted slots) + uint32_t occupied_; // Count of non-empty slots (includes deleted slots) static uint32_t Hash(int32_t a) { return static_cast(a) * 41; } @@ -270,25 +270,23 @@ inline GraphId MakeId(int32_t index, uint32_t version) { return g; } -inline int32_t NodeIndex(GraphId id) { - return static_cast(id.handle); -} +inline int32_t NodeIndex(GraphId id) { return static_cast(id.handle); } inline uint32_t NodeVersion(GraphId id) { return static_cast(id.handle >> 32); } struct Node { - int32_t rank; // rank number assigned by Pearce-Kelly algorithm - uint32_t version; // Current version number - int32_t next_hash; // Next entry in hash table - bool visited; // Temporary marker used by depth-first-search - uintptr_t masked_ptr; // User-supplied pointer - NodeSet in; // List of immediate predecessor nodes in graph - NodeSet out; // List of immediate successor nodes in graph - int priority; // Priority of recorded stack trace. - int nstack; // Depth of recorded stack trace. - void* stack[40]; // stack[0,nstack-1] holds stack trace for node. + int32_t rank; // rank number assigned by Pearce-Kelly algorithm + uint32_t version; // Current version number + int32_t next_hash; // Next entry in hash table + bool visited; // Temporary marker used by depth-first-search + uintptr_t masked_ptr; // User-supplied pointer + NodeSet in; // List of immediate predecessor nodes in graph + NodeSet out; // List of immediate successor nodes in graph + int priority; // Priority of recorded stack trace. + int nstack; // Depth of recorded stack trace. + void* stack[40]; // stack[0,nstack-1] holds stack trace for node. }; // Hash table for pointer to node index lookups. @@ -318,7 +316,7 @@ class PointerMap { // Advance through linked list while keeping track of the // predecessor slot that points to the current entry. auto masked = base_internal::HidePtr(ptr); - for (int32_t* slot = &table_[Hash(ptr)]; *slot != -1; ) { + for (int32_t* slot = &table_[Hash(ptr)]; *slot != -1;) { int32_t index = *slot; Node* n = (*nodes_)[static_cast(index)]; if (n->masked_ptr == masked) { @@ -381,7 +379,9 @@ GraphCycles::GraphCycles() { GraphCycles::~GraphCycles() { for (auto* node : rep_->nodes_) { - if (node == nullptr) { continue; } + if (node == nullptr) { + continue; + } node->Node::~Node(); base_internal::LowLevelAlloc::Free(node); } @@ -474,8 +474,7 @@ void GraphCycles::RemoveNode(void* ptr) { void* GraphCycles::Ptr(GraphId id) { Node* n = FindNode(rep_, id); - return n == nullptr ? nullptr - : base_internal::UnhidePtr(n->masked_ptr); + return n == nullptr ? nullptr : base_internal::UnhidePtr(n->masked_ptr); } bool GraphCycles::HasNode(GraphId node) { @@ -502,8 +501,8 @@ static bool ForwardDFS(GraphCycles::Rep* r, int32_t n, int32_t upper_bound); static void BackwardDFS(GraphCycles::Rep* r, int32_t n, int32_t lower_bound); static void Reorder(GraphCycles::Rep* r); static void Sort(const Vec&, Vec* delta); -static void MoveToList( - GraphCycles::Rep* r, Vec* src, Vec* dst); +static void MoveToList(GraphCycles::Rep* r, Vec* src, + Vec* dst); bool GraphCycles::InsertEdge(GraphId idx, GraphId idy) { Rep* r = rep_; @@ -605,9 +604,8 @@ static void Reorder(GraphCycles::Rep* r) { // Produce sorted list of all ranks that will be reassigned. r->merged_.resize(r->deltab_.size() + r->deltaf_.size()); - std::merge(r->deltab_.begin(), r->deltab_.end(), - r->deltaf_.begin(), r->deltaf_.end(), - r->merged_.begin()); + std::merge(r->deltab_.begin(), r->deltab_.end(), r->deltaf_.begin(), + r->deltaf_.end(), r->merged_.begin()); // Assign the ranks in order to the collected list. for (uint32_t i = 0; i < r->list_.size(); i++) { @@ -628,8 +626,8 @@ static void Sort(const Vec& nodes, Vec* delta) { std::sort(delta->begin(), delta->end(), cmp); } -static void MoveToList( - GraphCycles::Rep* r, Vec* src, Vec* dst) { +static void MoveToList(GraphCycles::Rep* r, Vec* src, + Vec* dst) { for (auto& v : *src) { int32_t w = v; // Replace v entry with its rank diff --git a/absl/synchronization/mutex.cc b/absl/synchronization/mutex.cc index 5091b8fd340..a4b0f01034e 100644 --- a/absl/synchronization/mutex.cc +++ b/absl/synchronization/mutex.cc @@ -226,7 +226,7 @@ static bool AtomicSetBits(std::atomic* pv, intptr_t bits, // Data for doing deadlock detection. ABSL_CONST_INIT static absl::base_internal::SpinLock deadlock_graph_mu( - absl::kConstInit, base_internal::SCHEDULE_KERNEL_ONLY); + base_internal::SCHEDULE_KERNEL_ONLY); // Graph used to detect deadlocks. ABSL_CONST_INIT static GraphCycles* deadlock_graph @@ -292,7 +292,7 @@ static const struct { }; ABSL_CONST_INIT static absl::base_internal::SpinLock synch_event_mu( - absl::kConstInit, base_internal::SCHEDULE_KERNEL_ONLY); + base_internal::SCHEDULE_KERNEL_ONLY); // Hash table size; should be prime > 2. // Can't be too small, as it's used for deadlock detection information. @@ -509,10 +509,10 @@ struct SynchWaitParams { const Condition* cond; // The condition that this thread is waiting for. // In Mutex, this field is set to zero if a timeout // expires. - KernelTimeout timeout; // timeout expiry---absolute time - // In Mutex, this field is set to zero if a timeout - // expires. - Mutex* const cvmu; // used for transfer from cond var to mutex + KernelTimeout timeout; // timeout expiry---absolute time + // In Mutex, this field is set to zero if a timeout + // expires. + Mutex* const cvmu; // used for transfer from cond var to mutex PerThreadSynch* const thread; // thread that is waiting // If not null, thread should be enqueued on the CondVar whose state @@ -1327,8 +1327,7 @@ static char* StackString(void** pcs, int n, char* buf, int maxlen, char sym[kSymLen]; int len = 0; for (int i = 0; i != n; i++) { - if (len >= maxlen) - return buf; + if (len >= maxlen) return buf; size_t count = static_cast(maxlen - len); if (symbolize) { if (!absl::Symbolize(pcs[i], sym, kSymLen)) { @@ -2286,7 +2285,7 @@ ABSL_ATTRIBUTE_NOINLINE void Mutex::UnlockSlow(SynchWaitParams* waitp) { // set up to walk the list PerThreadSynch* w_walk; // current waiter during list walk PerThreadSynch* pw_walk; // previous waiter during list walk - if (old_h != nullptr) { // we've searched up to old_h before + if (old_h != nullptr) { // we've searched up to old_h before pw_walk = old_h; w_walk = old_h->next; } else { // no prior search, start at beginning diff --git a/absl/time/clock.cc b/absl/time/clock.cc index ecd539e5caa..7dd48f0aebd 100644 --- a/absl/time/clock.cc +++ b/absl/time/clock.cc @@ -135,7 +135,7 @@ static inline uint64_t SeqAcquire(std::atomic *seq) { // fetch_add would be before it, not after. std::atomic_thread_fence(std::memory_order_release); - return x + 2; // original word plus 2 + return x + 2; // original word plus 2 } // Release seqlock (*seq) by writing x to it---a value previously returned by @@ -160,8 +160,8 @@ static const uint64_t kMinNSBetweenSamples = 2000 << 20; // We require that kMinNSBetweenSamples shifted by kScale // have at least a bit left over for 64-bit calculations. static_assert(((kMinNSBetweenSamples << (kScale + 1)) >> (kScale + 1)) == - kMinNSBetweenSamples, - "cannot represent kMaxBetweenSamplesNSScaled"); + kMinNSBetweenSamples, + "cannot represent kMaxBetweenSamplesNSScaled"); // data from a sample of the kernel's time value struct TimeSampleAtomic { @@ -206,8 +206,7 @@ struct ABSL_CACHELINE_ALIGNED TimeState { // A reader-writer lock protecting the static locations below. // See SeqAcquire() and SeqRelease() above. - absl::base_internal::SpinLock lock{absl::kConstInit, - base_internal::SCHEDULE_KERNEL_ONLY}; + absl::base_internal::SpinLock lock{base_internal::SCHEDULE_KERNEL_ONLY}; }; ABSL_CONST_INIT static TimeState time_state; @@ -439,8 +438,8 @@ static int64_t GetCurrentTimeNanosSlowPath() if (delta_cycles < sample.min_cycles_per_sample) { // Another thread updated the sample. This path does not take the seqlock // so that blocked readers can make progress without blocking new readers. - estimated_base_ns = sample.base_ns + - ((delta_cycles * sample.nsscaled_per_cycle) >> kScale); + estimated_base_ns = + sample.base_ns + ((delta_cycles * sample.nsscaled_per_cycle) >> kScale); time_state.stats_fast_slow_paths++; } else { estimated_base_ns = @@ -494,8 +493,8 @@ static uint64_t UpdateLastSample(uint64_t now_cycles, uint64_t now_ns, estimated_scaled_ns = (delta_cycles >> s) * sample->nsscaled_per_cycle; } while (estimated_scaled_ns / sample->nsscaled_per_cycle != (delta_cycles >> s)); - estimated_base_ns = sample->base_ns + - (estimated_scaled_ns >> (kScale - s)); + estimated_base_ns = + sample->base_ns + (estimated_scaled_ns >> (kScale - s)); } // Compute the assumed cycle time kMinNSBetweenSamples ns into the future @@ -522,8 +521,8 @@ static uint64_t UpdateLastSample(uint64_t now_cycles, uint64_t now_ns, diff_ns - (diff_ns / 16)); uint64_t new_nsscaled_per_cycle = SafeDivideAndScale(ns, assumed_next_sample_delta_cycles); - if (new_nsscaled_per_cycle != 0 && - diff_ns < 100 * 1000 * 1000 && -diff_ns < 100 * 1000 * 1000) { + if (new_nsscaled_per_cycle != 0 && diff_ns < 100 * 1000 * 1000 && + -diff_ns < 100 * 1000 * 1000) { // record the cycle time measurement time_state.last_sample.nsscaled_per_cycle.store( new_nsscaled_per_cycle, std::memory_order_relaxed); From 4e5beaf3b8bdfd2f80aba4e448cb1f659b3b7504 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 23 Jun 2025 12:52:24 -0700 Subject: [PATCH 088/107] Change PrecombineLengthMix to sample data from kStaticRandomData. The idea is based on Carbon hashing's SampleRandomData(). This also fixes an issue that we found when adding debug asserts to detect cases of long probe sequences in absl hash tables. In that case, the keys were length=29 strings with all the entropy in the 2nd and 3rd bytes, and we saw many collisions due to low entropy in the last byte of the hash. It seems that this was caused by the input seed having low entropy, and doing the length mixing this way solves that issue. Also: - Fix a bug in HashtableDebugAccess::GetNumProbes. - Move the DoubleRange test from flat_hash_set_test.cc to hash_test.cc since it's a test for hash quality. - Fix using declaration style in hash_test.cc to comply with Google C++ style guide. PiperOrigin-RevId: 774891965 Change-Id: I22f18a0288e9bfb2734f7b8c6e788b623a1db0f5 --- absl/container/flat_hash_set_test.cc | 15 ------- absl/container/internal/raw_hash_set.h | 2 +- absl/hash/hash_test.cc | 54 ++++++++++++++++++++++++-- absl/hash/internal/hash.cc | 8 ++-- absl/hash/internal/hash.h | 25 +++++------- 5 files changed, 65 insertions(+), 39 deletions(-) diff --git a/absl/container/flat_hash_set_test.cc b/absl/container/flat_hash_set_test.cc index f2c3fbd3fd8..ca069b402ad 100644 --- a/absl/container/flat_hash_set_test.cc +++ b/absl/container/flat_hash_set_test.cc @@ -397,21 +397,6 @@ TEST(FlatHashSet, IsDefaultHash) { false); } -// Test that we don't cause excessive collisions on the hash table for -// doubles in the range [-1024, 1024]. See cl/773069881 for more information. -TEST(FlatHashSet, DoubleRange) { - using absl::container_internal::hashtable_debug_internal:: - HashtableDebugAccess; - absl::flat_hash_set set; - for (double t = -1024.0; t < 1024.0; t += 1.0) { - set.insert(t); - } - for (double t = -1024.0; t < 1024.0; t += 1.0) { - ASSERT_LT(HashtableDebugAccess::GetNumProbes(set, t), 64) - << t; - } -} - } // namespace } // namespace container_internal ABSL_NAMESPACE_END diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 08c5d57aba0..94a3249a263 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -3713,7 +3713,7 @@ struct HashtableDebugAccess> { static size_t GetNumProbes(const Set& set, const typename Set::key_type& key) { - if (set.is_soo()) return 0; + if (set.is_small()) return 0; size_t num_probes = 0; const size_t hash = set.hash_of(key); auto seq = probe(set.common(), hash); diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index 85e34c915f0..0ad92cff83d 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -58,17 +58,17 @@ namespace { +using ::absl::Hash; +using ::absl::container_internal::hashtable_debug_internal:: + HashtableDebugAccess; +using ::absl::hash_internal::SpyHashState; using ::absl::hash_test_internal::is_hashable; using ::absl::hash_test_internal::TypeErasedContainer; using ::absl::hash_test_internal::TypeErasedValue; -using ::testing::SizeIs; template using TypeErasedVector = TypeErasedContainer>; -using absl::Hash; -using absl::hash_internal::SpyHashState; - template class HashValueIntTest : public testing::Test { }; @@ -1239,4 +1239,50 @@ TEST(HashOf, DoubleSignCollision) { EXPECT_NE(absl::HashOf(-1.0), absl::HashOf(1.0)); } +// Test that we don't cause excessive collisions on the hash table for +// doubles in the range [-1024, 1024]. See cl/773069881 for more information. +TEST(SwisstableCollisions, DoubleRange) { +#ifdef GOOGLE_UNSUPPORTED_OS_LOONIX + // TODO(b/424834054): make this test pass on Loonix. + GTEST_SKIP() << "Test fails on Loonix."; +#endif + + absl::flat_hash_set set; + for (double t = -1024.0; t < 1024.0; t += 1.0) { + set.insert(t); + ASSERT_LT(HashtableDebugAccess::GetNumProbes(set, t), 64) + << t; + } +} + +// Test that for each pair of adjacent bytes in a string, if there's only +// entropy in those two bytes, then we don't have excessive collisions. +TEST(SwisstableCollisions, LowEntropyStrings) { + if (sizeof(size_t) < 8) { + // TODO(b/424834054): make this test pass on 32-bit platforms. We need to + // make 32-bit Mix() stronger. + GTEST_SKIP() << "Test fails on 32-bit platforms"; + } + + constexpr char kMinChar = 0; + constexpr char kMaxChar = 64; + // These sizes cover the different hashing cases. + for (size_t size : {8u, 16u, 32u, 64u}) { + for (size_t b = 0; b < size - 1; ++b) { + absl::flat_hash_set set; + std::string s(size, '\0'); + for (char c1 = kMinChar; c1 < kMaxChar; ++c1) { + for (char c2 = kMinChar; c2 < kMaxChar; ++c2) { + s[b] = c1; + s[b + 1] = c2; + set.insert(s); + ASSERT_LT(HashtableDebugAccess::GetNumProbes(set, s), + 64) + << size << " " << b; + } + } + } + } +} + } // namespace diff --git a/absl/hash/internal/hash.cc b/absl/hash/internal/hash.cc index 87d2061c6a2..8dcc5bd4fd8 100644 --- a/absl/hash/internal/hash.cc +++ b/absl/hash/internal/hash.cc @@ -100,10 +100,10 @@ uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) { ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit( const unsigned char* data, size_t len, uint64_t state) { // TODO(b/417141985): expose and use CityHash32WithSeed. - return Mix( - PrecombineLengthMix(state, len) ^ - hash_internal::CityHash32(reinterpret_cast(data), len), - kMul); + // Note: we can't use PrecombineLengthMix here because len can be up to 1024. + return Mix((state + len) ^ hash_internal::CityHash32( + reinterpret_cast(data), len), + kMul); } ABSL_ATTRIBUTE_NOINLINE uint64_t diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index dc374055121..0b4f8beb0a5 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -935,21 +935,6 @@ hash_range_or_bytes(H hash_state, const T* data, size_t size) { hash_internal::WeaklyMixedInteger{size}); } -// Extremely weak mixture of length that is added to the state before combining -// the data. It is used only for small strings. -inline uint64_t PrecombineLengthMix(uint64_t state, size_t len) { - // The length is always one byte here. We place it to 4th byte for the - // following reasons: - // 1. 4th byte is unused for very short strings 0-3 bytes. - // 2. 4th byte is duplicated for 4 bytes string. - // 3. 4th byte is in the middle and mixed well for 5-8 bytes strings. - // - // There were experiments with adding just `len` here. - // Also seems have slightly better performance overall, that gives collisions - // for small strings. - return state + (uint64_t{len} << 24); -} - inline constexpr uint64_t kMul = uint64_t{0xdcb22ca68cb134ed}; // Random data taken from the hexadecimal digits of Pi's fractional component. @@ -959,6 +944,16 @@ ABSL_CACHELINE_ALIGNED inline constexpr uint64_t kStaticRandomData[] = { 0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377, }; +// Extremely weak mixture of length that is mixed into the state before +// combining the data. It is used only for small strings. This also ensures that +// we have high entropy in all bits of the state. +inline uint64_t PrecombineLengthMix(uint64_t state, size_t len) { + ABSL_ASSUME(len + sizeof(uint64_t) <= sizeof(kStaticRandomData)); + uint64_t data = absl::base_internal::UnalignedLoad64( + reinterpret_cast(&kStaticRandomData[0]) + len); + return state ^ data; +} + ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t Mix(uint64_t lhs, uint64_t rhs) { // For 32 bit platforms we are trying to use all 64 lower bits. if constexpr (sizeof(size_t) < 8) { From aed3ddd9482c8c6e1cb1ebd458fcce7c2235453c Mon Sep 17 00:00:00 2001 From: Tomas Dzetkulic Date: Mon, 23 Jun 2025 14:52:10 -0700 Subject: [PATCH 089/107] Improve NaN handling in absl::Duration arithmetic. PiperOrigin-RevId: 774936932 Change-Id: Ibde499b8c9825b4357edf71cfcb9c45eb75f4702 --- absl/time/duration.cc | 4 ++-- absl/time/duration_test.cc | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/absl/time/duration.cc b/absl/time/duration.cc index 38c4b63990c..fb7c90a2d74 100644 --- a/absl/time/duration.cc +++ b/absl/time/duration.cc @@ -469,7 +469,7 @@ Duration& Duration::operator*=(int64_t r) { Duration& Duration::operator*=(double r) { if (time_internal::IsInfiniteDuration(*this) || !IsFinite(r)) { - const bool is_neg = std::signbit(r) != (rep_hi_.Get() < 0); + const bool is_neg = std::isnan(r) || std::signbit(r) != (rep_hi_.Get() < 0); return *this = is_neg ? -InfiniteDuration() : InfiniteDuration(); } return *this = ScaleDouble(*this, r); @@ -485,7 +485,7 @@ Duration& Duration::operator/=(int64_t r) { Duration& Duration::operator/=(double r) { if (time_internal::IsInfiniteDuration(*this) || !IsValidDivisor(r)) { - const bool is_neg = std::signbit(r) != (rep_hi_.Get() < 0); + const bool is_neg = std::isnan(r) || std::signbit(r) != (rep_hi_.Get() < 0); return *this = is_neg ? -InfiniteDuration() : InfiniteDuration(); } return *this = ScaleDouble(*this, r); diff --git a/absl/time/duration_test.cc b/absl/time/duration_test.cc index 1e3fe670449..164ad6b0016 100644 --- a/absl/time/duration_test.cc +++ b/absl/time/duration_test.cc @@ -841,18 +841,18 @@ TEST(Duration, DivisionByZero) { TEST(Duration, NaN) { // Note that IEEE 754 does not define the behavior of a nan's sign when it is - // copied, so the code below allows for either + or - InfiniteDuration. + // copied. We return -InfiniteDuration in either case. #define TEST_NAN_HANDLING(NAME, NAN) \ do { \ const auto inf = absl::InfiniteDuration(); \ auto x = NAME(NAN); \ - EXPECT_TRUE(x == inf || x == -inf); \ + EXPECT_TRUE(x == -inf); \ auto y = NAME(42); \ y *= NAN; \ - EXPECT_TRUE(y == inf || y == -inf); \ + EXPECT_TRUE(y == -inf); \ auto z = NAME(42); \ z /= NAN; \ - EXPECT_TRUE(z == inf || z == -inf); \ + EXPECT_TRUE(z == -inf); \ } while (0) const double nan = std::numeric_limits::quiet_NaN(); From 7e1f86ff30f1eeace17f101cc3bc747c1ea53689 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 24 Jun 2025 13:32:03 -0700 Subject: [PATCH 090/107] In debug mode, assert that the probe sequence isn't excessively long. PiperOrigin-RevId: 775362001 Change-Id: I0704d69aa81020d0b6cca2e2c3e3e3690542af1a --- absl/container/internal/raw_hash_set.h | 19 +++++++++++++++++-- absl/hash/hash_test.cc | 4 ++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 94a3249a263..cbcce8d9aa4 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -3016,13 +3016,14 @@ class raw_hash_set { absl::PrefetchToLocalCache(slot_array() + seq.offset()); #endif Group g{ctrl + seq.offset()}; + // TODO(b/424834054): assert that Match doesn't have too many collisions. for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return iterator_at(seq.offset(i)); } if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end(); seq.next(); - ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); + AssertOnProbe(seq); } } @@ -3280,6 +3281,7 @@ class raw_hash_set { absl::PrefetchToLocalCache(slot_array() + seq.offset()); #endif Group g{ctrl + seq.offset()}; + // TODO(b/424834054): assert that Match doesn't have too many collisions. for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return {iterator_at(seq.offset(i)), false}; @@ -3298,7 +3300,7 @@ class raw_hash_set { return {iterator_at(index), true}; } seq.next(); - ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); + AssertOnProbe(seq); } } @@ -3380,6 +3382,19 @@ class raw_hash_set { IterateOverFullSlots(common(), sizeof(slot_type), assert_consistent); } + void AssertOnProbe([[maybe_unused]] const probe_seq& seq) { + // TODO(b/424834054): investigate and see if we can remove the deleted + // elements condition. + ABSL_SWISSTABLE_ASSERT( + (seq.index() <= 256 || seq.index() <= capacity() / 2 || + !common().growth_info().HasNoDeleted()) && + "The hash function has low entropy. If the hash function is not " + "absl::Hash, please replace it with absl::Hash. If you're already " + "using absl::Hash, please ensure you're following best practices in " + "go/absl-hash and if so, then report a bug to abseil (go/absl-bug)."); + ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); + } + // Attempts to find `key` in the table; if it isn't found, returns an iterator // where the value can be inserted into, with the control byte already set to // `key`'s H2. Returns a bool indicating whether an insertion can take place. diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index 0ad92cff83d..0ad78b66c52 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -1258,6 +1258,10 @@ TEST(SwisstableCollisions, DoubleRange) { // Test that for each pair of adjacent bytes in a string, if there's only // entropy in those two bytes, then we don't have excessive collisions. TEST(SwisstableCollisions, LowEntropyStrings) { +#if defined(__GNUC__) && !defined(__clang__) + // TODO(b/424834054): make this test pass on GCC. + GTEST_SKIP() << "Test fails on GCC"; +#endif if (sizeof(size_t) < 8) { // TODO(b/424834054): make this test pass on 32-bit platforms. We need to // make 32-bit Mix() stronger. From 2c5af194876775b30e4cfabc7946343e7401916f Mon Sep 17 00:00:00 2001 From: Jesse Rosenstock Date: Wed, 25 Jun 2025 06:37:11 -0700 Subject: [PATCH 091/107] AnyInvocable: Fix operator==/!= comments Comments referred to `*this` instead of `f`. #Cleanup #Documentation PiperOrigin-RevId: 775664647 Change-Id: Ibdd9a1611d59078a1b368985c8929154faef527a --- absl/functional/any_invocable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/absl/functional/any_invocable.h b/absl/functional/any_invocable.h index 43ea9af8824..524345d5c0b 100644 --- a/absl/functional/any_invocable.h +++ b/absl/functional/any_invocable.h @@ -295,22 +295,22 @@ class AnyInvocable : private internal_any_invocable::Impl { // Equality operators - // Returns `true` if `*this` is empty. + // Returns `true` if `f` is empty. friend bool operator==(const AnyInvocable& f, std::nullptr_t) noexcept { return !f.HasValue(); } - // Returns `true` if `*this` is empty. + // Returns `true` if `f` is empty. friend bool operator==(std::nullptr_t, const AnyInvocable& f) noexcept { return !f.HasValue(); } - // Returns `false` if `*this` is empty. + // Returns `false` if `f` is empty. friend bool operator!=(const AnyInvocable& f, std::nullptr_t) noexcept { return f.HasValue(); } - // Returns `false` if `*this` is empty. + // Returns `false` if `f` is empty. friend bool operator!=(std::nullptr_t, const AnyInvocable& f) noexcept { return f.HasValue(); } From 76a480304448e311cb15988bf5a670a7e8b2d85f Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Wed, 25 Jun 2025 06:47:07 -0700 Subject: [PATCH 092/107] Rollback debug assertion that the probe sequence isn't excessively long. PiperOrigin-RevId: 775667341 Change-Id: I6e69839a0d1c007da6c4639816180967addfc96e --- absl/container/internal/raw_hash_set.h | 19 ++----------------- absl/hash/hash_test.cc | 4 ---- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index cbcce8d9aa4..94a3249a263 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -3016,14 +3016,13 @@ class raw_hash_set { absl::PrefetchToLocalCache(slot_array() + seq.offset()); #endif Group g{ctrl + seq.offset()}; - // TODO(b/424834054): assert that Match doesn't have too many collisions. for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return iterator_at(seq.offset(i)); } if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end(); seq.next(); - AssertOnProbe(seq); + ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); } } @@ -3281,7 +3280,6 @@ class raw_hash_set { absl::PrefetchToLocalCache(slot_array() + seq.offset()); #endif Group g{ctrl + seq.offset()}; - // TODO(b/424834054): assert that Match doesn't have too many collisions. for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return {iterator_at(seq.offset(i)), false}; @@ -3300,7 +3298,7 @@ class raw_hash_set { return {iterator_at(index), true}; } seq.next(); - AssertOnProbe(seq); + ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); } } @@ -3382,19 +3380,6 @@ class raw_hash_set { IterateOverFullSlots(common(), sizeof(slot_type), assert_consistent); } - void AssertOnProbe([[maybe_unused]] const probe_seq& seq) { - // TODO(b/424834054): investigate and see if we can remove the deleted - // elements condition. - ABSL_SWISSTABLE_ASSERT( - (seq.index() <= 256 || seq.index() <= capacity() / 2 || - !common().growth_info().HasNoDeleted()) && - "The hash function has low entropy. If the hash function is not " - "absl::Hash, please replace it with absl::Hash. If you're already " - "using absl::Hash, please ensure you're following best practices in " - "go/absl-hash and if so, then report a bug to abseil (go/absl-bug)."); - ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); - } - // Attempts to find `key` in the table; if it isn't found, returns an iterator // where the value can be inserted into, with the control byte already set to // `key`'s H2. Returns a bool indicating whether an insertion can take place. diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index 0ad78b66c52..0ad92cff83d 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -1258,10 +1258,6 @@ TEST(SwisstableCollisions, DoubleRange) { // Test that for each pair of adjacent bytes in a string, if there's only // entropy in those two bytes, then we don't have excessive collisions. TEST(SwisstableCollisions, LowEntropyStrings) { -#if defined(__GNUC__) && !defined(__clang__) - // TODO(b/424834054): make this test pass on GCC. - GTEST_SKIP() << "Test fails on GCC"; -#endif if (sizeof(size_t) < 8) { // TODO(b/424834054): make this test pass on 32-bit platforms. We need to // make 32-bit Mix() stronger. From 2a2d6aad626f75a32c8b7bb775fbff73d558b8b1 Mon Sep 17 00:00:00 2001 From: Samuel Benzaquen Date: Thu, 26 Jun 2025 08:32:05 -0700 Subject: [PATCH 093/107] Update StatusOr to support lvalue reference value types. Some implementation notes: - RValue references are not supported right now. It complicates the implementation further and there doesn't seem to be a need for it. It might be done in the future. - Any kind of reference-to-reference conversion only allows those that do not require temporaries to be materialized. Eg `StatusOr` can convert to `StatusOr`, but it can't convert to `StatusOr`. - `operator*`/`value()`/`value_or()` always return the reference type, regardless of qualifications of the StatusOr. PiperOrigin-RevId: 776150069 Change-Id: I8446e7f76f6227f24e4de4b9490d20a8156ee8ab --- absl/status/internal/statusor_internal.h | 135 ++++++++-- absl/status/status_matchers_test.cc | 23 ++ absl/status/statusor.h | 73 +++--- absl/status/statusor_test.cc | 303 +++++++++++++++++++++++ 4 files changed, 476 insertions(+), 58 deletions(-) diff --git a/absl/status/internal/statusor_internal.h b/absl/status/internal/statusor_internal.h index 029fdeea385..a65318162cf 100644 --- a/absl/status/internal/statusor_internal.h +++ b/absl/status/internal/statusor_internal.h @@ -90,17 +90,34 @@ template struct IsDirectInitializationAmbiguous> : public IsConstructibleOrConvertibleFromStatusOr {}; +// Checks whether the conversion from U to T can be done without dangling +// temporaries. +// REQUIRES: T and U are references. +template +using IsReferenceConversionValid = absl::conjunction< // + std::is_reference, std::is_reference, + // The references are convertible. This checks for + // lvalue/rvalue compatibility. + std::is_convertible, + // The pointers are convertible. This checks we don't have + // a temporary. + std::is_convertible*, + std::remove_reference_t*>>; + // Checks against the constraints of the direction initialization, i.e. when // `StatusOr::StatusOr(U&&)` should participate in overload resolution. template using IsDirectInitializationValid = absl::disjunction< // Short circuits if T is basically U. - std::is_same>, - absl::negation, absl::remove_cvref_t>, - std::is_same>, - std::is_same>, - IsDirectInitializationAmbiguous>>>; + std::is_same>, // + std::conditional_t< + std::is_reference_v, // + IsReferenceConversionValid, + absl::negation, absl::remove_cvref_t>, + std::is_same>, + std::is_same>, + IsDirectInitializationAmbiguous>>>>; // This trait detects whether `StatusOr::operator=(U&&)` is ambiguous, which // is equivalent to whether all the following conditions are met: @@ -140,7 +157,9 @@ using Equality = std::conditional_t>; template using IsConstructionValid = absl::conjunction< Equality>, + absl::disjunction< + std::is_reference, + type_traits_internal::IsLifetimeBoundAssignment>>, IsDirectInitializationValid, std::is_constructible, Equality>, absl::disjunction< @@ -156,8 +175,13 @@ using IsConstructionValid = absl::conjunction< template using IsAssignmentValid = absl::conjunction< Equality>, - std::is_constructible, std::is_assignable, + absl::disjunction< + std::is_reference, + type_traits_internal::IsLifetimeBoundAssignment>>, + std::conditional_t, + IsReferenceConversionValid, + absl::conjunction, + std::is_assignable>>, absl::disjunction< std::is_same>, absl::conjunction< @@ -178,6 +202,9 @@ template using IsConstructionFromStatusOrValid = absl::conjunction< absl::negation>, + // If `T` is a reference, then U must be a compatible one. + absl::disjunction>, + IsReferenceConversionValid>, Equality>, std::is_constructible, @@ -193,6 +220,16 @@ using IsStatusOrAssignmentValid = absl::conjunction< absl::negation>>>; +template +using IsValueOrValid = absl::conjunction< + // If `T` is a reference, then U must be a compatible one. + absl::disjunction>, + IsReferenceConversionValid>, + Equality, + type_traits_internal::IsLifetimeBoundAssignment>>>; + class Helper { public: // Move type-agnostic error handling to the .cc. @@ -209,6 +246,26 @@ void PlacementNew(void* absl_nonnull p, Args&&... args) { new (p) T(std::forward(args)...); } +template +class Reference { + public: + constexpr explicit Reference(T ref ABSL_ATTRIBUTE_LIFETIME_BOUND) + : payload_(std::addressof(ref)) {} + + Reference(const Reference&) = default; + Reference& operator=(const Reference&) = default; + Reference& operator=(T value) { + payload_ = std::addressof(value); + return *this; + } + + operator T() const { return static_cast(*payload_); } // NOLINT + T get() const { return *this; } + + private: + std::remove_reference_t* absl_nonnull payload_; +}; + // Helper base class to hold the data and all operations. // We move all this to a base class to allow mixing with the appropriate // TraitsBase specialization. @@ -217,6 +274,14 @@ class StatusOrData { template friend class StatusOrData; + decltype(auto) MaybeMoveData() { + if constexpr (std::is_reference_v) { + return data_.get(); + } else { + return std::move(data_); + } + } + public: StatusOrData() = delete; @@ -231,7 +296,7 @@ class StatusOrData { StatusOrData(StatusOrData&& other) noexcept { if (other.ok()) { - MakeValue(std::move(other.data_)); + MakeValue(other.MaybeMoveData()); MakeStatus(); } else { MakeStatus(std::move(other.status_)); @@ -251,7 +316,7 @@ class StatusOrData { template explicit StatusOrData(StatusOrData&& other) { if (other.ok()) { - MakeValue(std::move(other.data_)); + MakeValue(other.MaybeMoveData()); MakeStatus(); } else { MakeStatus(std::move(other.status_)); @@ -264,13 +329,6 @@ class StatusOrData { MakeStatus(); } - explicit StatusOrData(const T& value) : data_(value) { - MakeStatus(); - } - explicit StatusOrData(T&& value) : data_(std::move(value)) { - MakeStatus(); - } - template ::value, int> = 0> @@ -290,7 +348,7 @@ class StatusOrData { StatusOrData& operator=(StatusOrData&& other) { if (this == &other) return *this; if (other.ok()) - Assign(std::move(other.data_)); + Assign(other.MaybeMoveData()); else AssignStatus(std::move(other.status_)); return *this; @@ -299,7 +357,9 @@ class StatusOrData { ~StatusOrData() { if (ok()) { status_.~Status(); - data_.~T(); + if constexpr (!std::is_trivially_destructible_v) { + data_.~T(); + } } else { status_.~Status(); } @@ -340,11 +400,13 @@ class StatusOrData { // When T is const, we need some non-const object we can cast to void* for // the placement new. dummy_ is that object. Dummy dummy_; - T data_; + std::conditional_t, Reference, T> data_; }; void Clear() { - if (ok()) data_.~T(); + if constexpr (!std::is_trivially_destructible_v) { + if (ok()) data_.~T(); + } } void EnsureOk() const { @@ -359,7 +421,8 @@ class StatusOrData { // argument. template void MakeValue(Arg&&... arg) { - internal_statusor::PlacementNew(&dummy_, std::forward(arg)...); + internal_statusor::PlacementNew(&dummy_, + std::forward(arg)...); } // Construct the status (ie. status_) through placement new with the passed @@ -369,6 +432,22 @@ class StatusOrData { internal_statusor::PlacementNew(&status_, std::forward(args)...); } + + template + T ValueOrImpl(U&& default_value) const& { + if (ok()) { + return data_; + } + return std::forward(default_value); + } + + template + T ValueOrImpl(U&& default_value) && { + if (ok()) { + return std::move(data_); + } + return std::forward(default_value); + } }; // Helper base classes to allow implicitly deleted constructors and assignment @@ -411,8 +490,9 @@ struct MoveCtorBase { MoveCtorBase& operator=(MoveCtorBase&&) = default; }; -template ::value&& - std::is_copy_assignable::value> +template ::value && + std::is_copy_assignable::value) || + std::is_reference_v> struct CopyAssignBase { CopyAssignBase() = default; CopyAssignBase(const CopyAssignBase&) = default; @@ -430,8 +510,9 @@ struct CopyAssignBase { CopyAssignBase& operator=(CopyAssignBase&&) = default; }; -template ::value&& - std::is_move_assignable::value> +template ::value && + std::is_move_assignable::value) || + std::is_reference_v> struct MoveAssignBase { MoveAssignBase() = default; MoveAssignBase(const MoveAssignBase&) = default; diff --git a/absl/status/status_matchers_test.cc b/absl/status/status_matchers_test.cc index b8ccaa4cb76..51a5f27670c 100644 --- a/absl/status/status_matchers_test.cc +++ b/absl/status/status_matchers_test.cc @@ -18,6 +18,7 @@ #include "absl/status/status_matchers.h" #include +#include #include "gmock/gmock.h" #include "gtest/gtest-spi.h" @@ -31,9 +32,12 @@ namespace { using ::absl_testing::IsOk; using ::absl_testing::IsOkAndHolds; using ::absl_testing::StatusIs; +using ::testing::ElementsAre; using ::testing::Eq; using ::testing::Gt; using ::testing::MatchesRegex; +using ::testing::Not; +using ::testing::Ref; TEST(StatusMatcherTest, StatusIsOk) { EXPECT_THAT(absl::OkStatus(), IsOk()); } @@ -158,4 +162,23 @@ TEST(StatusMatcherTest, StatusIsFailure) { "ungueltig"); } +TEST(StatusMatcherTest, ReferencesWork) { + int i = 17; + int j = 19; + EXPECT_THAT(absl::StatusOr(i), IsOkAndHolds(17)); + EXPECT_THAT(absl::StatusOr(i), Not(IsOkAndHolds(19))); + EXPECT_THAT(absl::StatusOr(i), IsOkAndHolds(17)); + + // Reference testing works as expected. + EXPECT_THAT(absl::StatusOr(i), IsOkAndHolds(Ref(i))); + EXPECT_THAT(absl::StatusOr(i), Not(IsOkAndHolds(Ref(j)))); + + // Try a more complex one. + std::vector vec = {"A", "B", "C"}; + EXPECT_THAT(absl::StatusOr&>(vec), + IsOkAndHolds(ElementsAre("A", "B", "C"))); + EXPECT_THAT(absl::StatusOr&>(vec), + Not(IsOkAndHolds(ElementsAre("A", "X", "C")))); +} + } // namespace diff --git a/absl/status/statusor.h b/absl/status/statusor.h index 25c62887bf7..d2d16d59cb8 100644 --- a/absl/status/statusor.h +++ b/absl/status/statusor.h @@ -194,6 +194,11 @@ class StatusOr : private internal_statusor::StatusOrData, private internal_statusor::MoveCtorBase, private internal_statusor::CopyAssignBase, private internal_statusor::MoveAssignBase { +#ifndef SWIG + static_assert(!std::is_rvalue_reference_v, + "rvalue references are not yet supported."); +#endif // SWIG + template friend class StatusOr; @@ -397,7 +402,7 @@ class StatusOr : private internal_statusor::StatusOrData, typename std::enable_if< internal_statusor::IsAssignmentValid::value, int>::type = 0> - StatusOr& operator=(U&& v ABSL_ATTRIBUTE_LIFETIME_BOUND) { + StatusOr& operator=(U&& v ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) { this->Assign(std::forward(v)); return *this; } @@ -520,8 +525,10 @@ class StatusOr : private internal_statusor::StatusOrData, // REQUIRES: `this->ok() == true`, otherwise the behavior is undefined. // // Use `this->ok()` to verify that there is a current value. - const T* absl_nonnull operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND; - T* absl_nonnull operator->() ABSL_ATTRIBUTE_LIFETIME_BOUND; + std::remove_reference_t* absl_nonnull operator->() const + ABSL_ATTRIBUTE_LIFETIME_BOUND; + std::remove_reference_t* absl_nonnull operator->() + ABSL_ATTRIBUTE_LIFETIME_BOUND; // StatusOr::value_or() // @@ -536,10 +543,34 @@ class StatusOr : private internal_statusor::StatusOrData, // // Unlike with `value`, calling `std::move()` on the result of `value_or` will // still trigger a copy. - template - T value_or(U&& default_value) const&; - template - T value_or(U&& default_value) &&; + template < + typename U, + std::enable_if_t::value, + int> = 0> + T value_or(U&& default_value) const& { + return this->ValueOrImpl(std::forward(default_value)); + } + template < + typename U, + std::enable_if_t::value, + int> = 0> + T value_or(U&& default_value) && { + return std::move(*this).ValueOrImpl(std::forward(default_value)); + } + template < + typename U, + std::enable_if_t::value, + int> = 0> + T value_or(U&& default_value ABSL_ATTRIBUTE_LIFETIME_BOUND) const& { + return this->ValueOrImpl(std::forward(default_value)); + } + template < + typename U, + std::enable_if_t::value, + int> = 0> + T value_or(U&& default_value ABSL_ATTRIBUTE_LIFETIME_BOUND) && { + return std::move(*this).ValueOrImpl(std::forward(default_value)); + } // StatusOr::IgnoreError() // @@ -760,33 +791,13 @@ T&& StatusOr::operator*() && { } template -const T* absl_nonnull StatusOr::operator->() const { - this->EnsureOk(); - return &this->data_; -} - -template -T* absl_nonnull StatusOr::operator->() { - this->EnsureOk(); - return &this->data_; -} - -template -template -T StatusOr::value_or(U&& default_value) const& { - if (ok()) { - return this->data_; - } - return std::forward(default_value); +std::remove_reference_t* absl_nonnull StatusOr::operator->() const { + return std::addressof(**this); } template -template -T StatusOr::value_or(U&& default_value) && { - if (ok()) { - return std::move(this->data_); - } - return std::forward(default_value); +std::remove_reference_t* absl_nonnull StatusOr::operator->() { + return std::addressof(**this); } template diff --git a/absl/status/statusor_test.cc b/absl/status/statusor_test.cc index 17a33842cc9..000445315b7 100644 --- a/absl/status/statusor_test.cc +++ b/absl/status/statusor_test.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -1799,4 +1800,306 @@ TEST(StatusOr, ErrorPrinting) { EXPECT_THAT(absl::StrCat(print_me), error_matcher); } +TEST(StatusOr, SupportsReferenceTypes) { + int i = 1; + absl::StatusOr s = i; + EXPECT_EQ(&i, &*s); + *s = 10; + EXPECT_EQ(i, 10); +} + +TEST(StatusOr, ReferenceFromStatus) { + int i = 10; + absl::StatusOr s = i; + s = absl::InternalError("foo"); + EXPECT_EQ(s.status().message(), "foo"); + + absl::StatusOr s2 = absl::InternalError("foo2"); + EXPECT_EQ(s2.status().message(), "foo2"); +} + +TEST(StatusOr, SupportReferenceValueConstructor) { + int i = 1; + absl::StatusOr s = i; + absl::StatusOr cs = i; + absl::StatusOr cs2 = std::move(i); // `T&&` to `const T&` is ok. + + EXPECT_EQ(&i, &*s); + EXPECT_EQ(&i, &*cs); + + Derived d; + absl::StatusOr b = d; + EXPECT_EQ(&d, &*b); + + // We disallow constructions that cause temporaries. + EXPECT_FALSE((std::is_constructible_v, double>)); + EXPECT_FALSE( + (std::is_constructible_v, const double&>)); + EXPECT_FALSE( + (std::is_constructible_v, + std::string>)); + + // We disallow constructions with wrong reference. + EXPECT_FALSE((std::is_constructible_v, int&&>)); + EXPECT_FALSE((std::is_constructible_v, const int&>)); +} + +TEST(StatusOr, SupportReferenceConvertingConstructor) { + int i = 1; + absl::StatusOr s = i; + absl::StatusOr cs = s; + + EXPECT_EQ(&i, &*s); + EXPECT_EQ(&i, &*cs); + + // The other direction is not allowed. + EXPECT_FALSE((std::is_constructible_v, + absl::StatusOr>)); + + Derived d; + absl::StatusOr b = absl::StatusOr(d); + EXPECT_EQ(&d, &*b); + + // The other direction is not allowed. + EXPECT_FALSE((std::is_constructible_v, + absl::StatusOr>)); + + // We disallow conversions that cause temporaries. + EXPECT_FALSE((std::is_constructible_v, + absl::StatusOr>)); + EXPECT_FALSE((std::is_constructible_v, + absl::StatusOr>)); + EXPECT_FALSE((std::is_constructible_v, + absl::StatusOr>)); + EXPECT_FALSE((std::is_constructible_v, + absl::StatusOr>)); + EXPECT_FALSE( + (std::is_constructible_v, + absl::StatusOr>)); + + // We disallow constructions with wrong reference. + EXPECT_FALSE((std::is_constructible_v, + absl::StatusOr>)); +} + +TEST(StatusOr, SupportReferenceValueAssignment) { + int i = 1; + absl::StatusOr s = i; + absl::StatusOr cs; + cs = i; + absl::StatusOr cs2; + cs2 = std::move(i); // `T&&` to `const T&` is ok. + + EXPECT_EQ(&i, &*s); + EXPECT_EQ(&i, &*cs); + + Derived d; + absl::StatusOr b; + b = d; + EXPECT_EQ(&d, &*b); + + // We disallow constructions that cause temporaries. + EXPECT_FALSE((std::is_assignable_v, double>)); + EXPECT_FALSE( + (std::is_assignable_v, const double&>)); + EXPECT_FALSE((std::is_assignable_v, + std::string>)); + + // We disallow constructions with wrong reference. + EXPECT_FALSE((std::is_assignable_v, int&&>)); + EXPECT_FALSE((std::is_assignable_v, const int&>)); +} + +TEST(StatusOr, SupportReferenceConvertingAssignment) { + int i = 1; + absl::StatusOr s; + s = i; + absl::StatusOr cs; + cs = s; + + EXPECT_EQ(&i, &*s); + EXPECT_EQ(&i, &*cs); + + // The other direction is not allowed. + EXPECT_FALSE( + (std::is_assignable_v, absl::StatusOr>)); + + Derived d; + absl::StatusOr b; + b = absl::StatusOr(d); + EXPECT_EQ(&d, &*b); + + // The other direction is not allowed. + EXPECT_FALSE((std::is_assignable_v, + absl::StatusOr>)); + + // We disallow conversions that cause temporaries. + EXPECT_FALSE((std::is_assignable_v, + absl::StatusOr>)); + EXPECT_FALSE((std::is_assignable_v, + absl::StatusOr>)); + EXPECT_FALSE((std::is_assignable_v, + absl::StatusOr>)); + + // We disallow constructions with wrong reference. + EXPECT_FALSE( + (std::is_assignable_v, absl::StatusOr>)); +} + +TEST(StatusOr, SupportReferenceToNonReferenceConversions) { + int i = 17; + absl::StatusOr si = i; + absl::StatusOr sf = si; + EXPECT_THAT(sf, IsOkAndHolds(17.)); + + i = 20; + sf = si; + EXPECT_THAT(sf, IsOkAndHolds(20.)); + + EXPECT_THAT(absl::StatusOr(absl::StatusOr(i)), + IsOkAndHolds(20)); + EXPECT_THAT(absl::StatusOr(absl::StatusOr(i)), + IsOkAndHolds(20)); + + std::string str = "str"; + absl::StatusOr sos = absl::StatusOr(str); + EXPECT_THAT(sos, IsOkAndHolds("str")); + str = "str2"; + EXPECT_THAT(sos, IsOkAndHolds("str")); + sos = absl::StatusOr(str); + EXPECT_THAT(sos, IsOkAndHolds("str2")); + + absl::StatusOr sosv = absl::StatusOr(str); + EXPECT_THAT(sosv, IsOkAndHolds("str2")); + str = "str3"; + sosv = absl::StatusOr(str); + EXPECT_THAT(sosv, IsOkAndHolds("str3")); + + absl::string_view view = "view"; + // This way it is constructible, but not convertible because + // string_view->string is explicit + EXPECT_THAT( + absl::StatusOr(absl::StatusOr(view)), + IsOkAndHolds("view")); +#if defined(ABSL_USES_STD_STRING_VIEW) + // The assignment doesn't work with normal absl::string_view because + // std::string doesn't know about it. + sos = absl::StatusOr(view); + EXPECT_THAT(sos, IsOkAndHolds("view")); +#endif + + EXPECT_FALSE((std::is_convertible_v, + absl::StatusOr>)); +} + +TEST(StatusOr, ReferenceOperatorStarAndArrow) { + std::string str = "Foo"; + absl::StatusOr s = str; + s->assign("Bar"); + EXPECT_EQ(str, "Bar"); + + *s = "Baz"; + EXPECT_EQ(str, "Baz"); + + const absl::StatusOr cs = str; + // Even if the StatusOr is const, the reference it gives is non-const so we + // can still assign. + *cs = "Finally"; + EXPECT_EQ(str, "Finally"); + + cs->clear(); + EXPECT_EQ(cs.value(), str); + EXPECT_EQ(str, ""); +} + +TEST(StatusOr, ReferenceValueOr) { + int i = 17; + absl::StatusOr si = i; + + int other = 20; + EXPECT_EQ(&i, &si.value_or(other)); + + si = absl::UnknownError(""); + EXPECT_EQ(&other, &si.value_or(other)); + + absl::StatusOr csi = i; + EXPECT_EQ(&i, &csi.value_or(1)); + + const auto value_or_call = [](auto&& sor, auto&& v) + -> decltype(std::forward(sor).value_or( + std::forward(v))) {}; + using Probe = decltype(value_or_call); + // Just to verify that Probe works as expected in the good cases. + EXPECT_TRUE((std::is_invocable_v, int&&>)); + // Causes temporary conversion. + EXPECT_FALSE( + (std::is_invocable_v, double&&>)); + // Const invalid. + EXPECT_FALSE((std::is_invocable_v, const int&>)); +} + +TEST(StatusOr, ReferenceAssignmentFromStatusOr) { + std::vector v = {1, 2, 3}; + absl::StatusOr si = v[0]; + absl::StatusOr si2 = v[1]; + + EXPECT_THAT(v, ElementsAre(1, 2, 3)); + EXPECT_THAT(si, IsOkAndHolds(1)); + EXPECT_THAT(si2, IsOkAndHolds(2)); + + // This rebinds the reference. + si = si2; + EXPECT_THAT(v, ElementsAre(1, 2, 3)); + EXPECT_THAT(si, IsOkAndHolds(2)); + EXPECT_THAT(si2, IsOkAndHolds(2)); + EXPECT_EQ(&*si, &*si2); +} + +TEST(StatusOr, ReferenceAssignFromReference) { + std::vector v = {1, 2, 3}; + absl::StatusOr si = v[0]; + + EXPECT_THAT(v, ElementsAre(1, 2, 3)); + EXPECT_THAT(si, IsOkAndHolds(1)); + + // This rebinds the reference. + si = v[2]; + EXPECT_THAT(v, ElementsAre(1, 2, 3)); + EXPECT_THAT(si, IsOkAndHolds(3)); + EXPECT_EQ(&*si, &v[2]); +} + +template +void TestReferenceDeref() { + static_assert(std::is_same_v())>); + static_assert(std::is_same_v().value())>); +} + +TEST(StatusOr, ReferenceTypeIsMaintainedOnDeref) { + TestReferenceDeref&>(); + TestReferenceDeref&&>(); + TestReferenceDeref&>(); + TestReferenceDeref&&>(); + + TestReferenceDeref&>(); + TestReferenceDeref&&>(); + TestReferenceDeref&>(); + TestReferenceDeref&&>(); + + struct Struct { + int value; + }; + EXPECT_TRUE( + (std::is_same_v< + int&, decltype((std::declval>()->value))>)); + EXPECT_TRUE( + (std::is_same_v< + int&, + decltype((std::declval>()->value))>)); + EXPECT_TRUE( + (std::is_same_v< + const int&, + decltype((std::declval>()->value))>)); +} + } // namespace From c6268453ad65351cd6c5b39d69ec1a9ff39801b3 Mon Sep 17 00:00:00 2001 From: Samuel Benzaquen Date: Fri, 27 Jun 2025 08:10:12 -0700 Subject: [PATCH 094/107] Fix LIFETIME annotations for op*/op->/value operators for reference types. The lifetime of the returned references is not tied to the lifetime of the StatusOr. PiperOrigin-RevId: 776588991 Change-Id: If78e63a3f9120820ff888bc43f7180f3e4c3677e --- absl/status/internal/statusor_internal.h | 74 ++++++++++++++++++++- absl/status/statusor.h | 82 ++---------------------- absl/status/statusor_test.cc | 19 ++++++ 3 files changed, 98 insertions(+), 77 deletions(-) diff --git a/absl/status/internal/statusor_internal.h b/absl/status/internal/statusor_internal.h index a65318162cf..b6641041eac 100644 --- a/absl/status/internal/statusor_internal.h +++ b/absl/status/internal/statusor_internal.h @@ -450,6 +450,78 @@ class StatusOrData { } }; +[[noreturn]] void ThrowBadStatusOrAccess(absl::Status status); + +template +struct OperatorBase { + auto& self() const { return static_cast&>(*this); } + auto& self() { return static_cast&>(*this); } + + const T& operator*() const& ABSL_ATTRIBUTE_LIFETIME_BOUND { + self().EnsureOk(); + return self().data_; + } + T& operator*() & ABSL_ATTRIBUTE_LIFETIME_BOUND { + self().EnsureOk(); + return self().data_; + } + const T&& operator*() const&& ABSL_ATTRIBUTE_LIFETIME_BOUND { + self().EnsureOk(); + return std::move(self().data_); + } + T&& operator*() && ABSL_ATTRIBUTE_LIFETIME_BOUND { + self().EnsureOk(); + return std::move(self().data_); + } + + const T& value() const& ABSL_ATTRIBUTE_LIFETIME_BOUND { + if (!self().ok()) internal_statusor::ThrowBadStatusOrAccess(self().status_); + return self().data_; + } + T& value() & ABSL_ATTRIBUTE_LIFETIME_BOUND { + if (!self().ok()) internal_statusor::ThrowBadStatusOrAccess(self().status_); + return self().data_; + } + const T&& value() const&& ABSL_ATTRIBUTE_LIFETIME_BOUND { + if (!self().ok()) { + internal_statusor::ThrowBadStatusOrAccess(std::move(self().status_)); + } + return std::move(self().data_); + } + T&& value() && ABSL_ATTRIBUTE_LIFETIME_BOUND { + if (!self().ok()) { + internal_statusor::ThrowBadStatusOrAccess(std::move(self().status_)); + } + return std::move(self().data_); + } + + const T* absl_nonnull operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { + return std::addressof(**this); + } + T* absl_nonnull operator->() ABSL_ATTRIBUTE_LIFETIME_BOUND { + return std::addressof(**this); + } +}; + +template +struct OperatorBase { + auto& self() const { return static_cast&>(*this); } + + T& operator*() const { + self().EnsureOk(); + return self().data_; + } + + T& value() const { + if (!self().ok()) internal_statusor::ThrowBadStatusOrAccess(self().status_); + return self().data_; + } + + T* absl_nonnull operator->() const { + return std::addressof(**this); + } +}; + // Helper base classes to allow implicitly deleted constructors and assignment // operators in `StatusOr`. For example, `CopyCtorBase` will explicitly delete // the copy constructor when T is not copy constructible and `StatusOr` will @@ -530,8 +602,6 @@ struct MoveAssignBase { MoveAssignBase& operator=(MoveAssignBase&&) = delete; }; -[[noreturn]] void ThrowBadStatusOrAccess(absl::Status status); - // Used to introduce jitter into the output of printing functions for // `StatusOr` (i.e. `AbslStringify` and `operator<<`). class StringifyRandom { diff --git a/absl/status/statusor.h b/absl/status/statusor.h index d2d16d59cb8..56309af32e0 100644 --- a/absl/status/statusor.h +++ b/absl/status/statusor.h @@ -189,7 +189,8 @@ class ABSL_MUST_USE_RESULT StatusOr; // return Foo(arg); // } template -class StatusOr : private internal_statusor::StatusOrData, +class StatusOr : private internal_statusor::OperatorBase, + private internal_statusor::StatusOrData, private internal_statusor::CopyCtorBase, private internal_statusor::MoveCtorBase, private internal_statusor::CopyAssignBase, @@ -202,6 +203,8 @@ class StatusOr : private internal_statusor::StatusOrData, template friend class StatusOr; + friend internal_statusor::OperatorBase; + typedef internal_statusor::StatusOrData Base; public: @@ -498,10 +501,7 @@ class StatusOr : private internal_statusor::StatusOrData, // // The `std::move` on statusor instead of on the whole expression enables // warnings about possible uses of the statusor object after the move. - const T& value() const& ABSL_ATTRIBUTE_LIFETIME_BOUND; - T& value() & ABSL_ATTRIBUTE_LIFETIME_BOUND; - const T&& value() const&& ABSL_ATTRIBUTE_LIFETIME_BOUND; - T&& value() && ABSL_ATTRIBUTE_LIFETIME_BOUND; + using StatusOr::OperatorBase::value; // StatusOr:: operator*() // @@ -513,10 +513,7 @@ class StatusOr : private internal_statusor::StatusOrData, // `absl::StatusOr`. Alternatively, see the `value()` member function for a // similar API that guarantees crashing or throwing an exception if there is // no current value. - const T& operator*() const& ABSL_ATTRIBUTE_LIFETIME_BOUND; - T& operator*() & ABSL_ATTRIBUTE_LIFETIME_BOUND; - const T&& operator*() const&& ABSL_ATTRIBUTE_LIFETIME_BOUND; - T&& operator*() && ABSL_ATTRIBUTE_LIFETIME_BOUND; + using StatusOr::OperatorBase::operator*; // StatusOr::operator->() // @@ -525,10 +522,7 @@ class StatusOr : private internal_statusor::StatusOrData, // REQUIRES: `this->ok() == true`, otherwise the behavior is undefined. // // Use `this->ok()` to verify that there is a current value. - std::remove_reference_t* absl_nonnull operator->() const - ABSL_ATTRIBUTE_LIFETIME_BOUND; - std::remove_reference_t* absl_nonnull operator->() - ABSL_ATTRIBUTE_LIFETIME_BOUND; + using StatusOr::OperatorBase::operator->; // StatusOr::value_or() // @@ -738,68 +732,6 @@ Status StatusOr::status() && { return ok() ? OkStatus() : std::move(this->status_); } -template -const T& StatusOr::value() const& { - if (!this->ok()) internal_statusor::ThrowBadStatusOrAccess(this->status_); - return this->data_; -} - -template -T& StatusOr::value() & { - if (!this->ok()) internal_statusor::ThrowBadStatusOrAccess(this->status_); - return this->data_; -} - -template -const T&& StatusOr::value() const&& { - if (!this->ok()) { - internal_statusor::ThrowBadStatusOrAccess(std::move(this->status_)); - } - return std::move(this->data_); -} - -template -T&& StatusOr::value() && { - if (!this->ok()) { - internal_statusor::ThrowBadStatusOrAccess(std::move(this->status_)); - } - return std::move(this->data_); -} - -template -const T& StatusOr::operator*() const& { - this->EnsureOk(); - return this->data_; -} - -template -T& StatusOr::operator*() & { - this->EnsureOk(); - return this->data_; -} - -template -const T&& StatusOr::operator*() const&& { - this->EnsureOk(); - return std::move(this->data_); -} - -template -T&& StatusOr::operator*() && { - this->EnsureOk(); - return std::move(this->data_); -} - -template -std::remove_reference_t* absl_nonnull StatusOr::operator->() const { - return std::addressof(**this); -} - -template -std::remove_reference_t* absl_nonnull StatusOr::operator->() { - return std::addressof(**this); -} - template void StatusOr::IgnoreError() const { // no-op diff --git a/absl/status/statusor_test.cc b/absl/status/statusor_test.cc index 000445315b7..26d4235808c 100644 --- a/absl/status/statusor_test.cc +++ b/absl/status/statusor_test.cc @@ -2069,6 +2069,25 @@ TEST(StatusOr, ReferenceAssignFromReference) { EXPECT_EQ(&*si, &v[2]); } +TEST(StatusOr, ReferenceIsNotLifetimeBoundForStarValue) { + int i = 0; + + // op*/value should not be LIFETIME_BOUND because the ref is not limited to + // the lifetime of the StatusOr. + int& r = *absl::StatusOr(i); + EXPECT_EQ(&r, &i); + int& r2 = absl::StatusOr(i).value(); + EXPECT_EQ(&r2, &i); + + struct S { + int i; + }; + S s; + // op-> should also not be LIFETIME_BOUND for refs. + int& r3 = absl::StatusOr(s)->i; + EXPECT_EQ(&r3, &s.i); +} + template void TestReferenceDeref() { static_assert(std::is_same_v())>); From bca1ec088c95808fa60c326eaa4c9c6a170ff673 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Fri, 27 Jun 2025 09:11:53 -0700 Subject: [PATCH 095/107] Change the value of kMul to have higher entropy and prevent collisions when keys are aligned integers or pointers. We found that there were cases of high collisions specifically when keys were aligned to e.g. 8K (lowest 13 bits 0). PiperOrigin-RevId: 776608240 Change-Id: I92100c885f2dcee95a68f91c9dac9abd23fad52b --- absl/hash/hash_test.cc | 15 +++++++++++++++ absl/hash/internal/hash.h | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index 0ad92cff83d..ab094a16269 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -1285,4 +1285,19 @@ TEST(SwisstableCollisions, LowEntropyStrings) { } } +// Test that we don't have excessive collisions when keys are consecutive +// integers rotated by N bits. +TEST(SwisstableCollisions, LowEntropyInts) { + constexpr int kSizeTBits = sizeof(size_t) * 8; + for (int bit = 0; bit < kSizeTBits; ++bit) { + absl::flat_hash_set set; + for (size_t i = 0; i < 128 * 1024; ++i) { + size_t v = absl::rotl(i, bit); + set.insert(v); + ASSERT_LT(HashtableDebugAccess::GetNumProbes(set, v), 32) + << bit << " " << i; + } + } +} + } // namespace diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 0b4f8beb0a5..8fd9e97b5d0 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -935,7 +935,7 @@ hash_range_or_bytes(H hash_state, const T* data, size_t size) { hash_internal::WeaklyMixedInteger{size}); } - inline constexpr uint64_t kMul = uint64_t{0xdcb22ca68cb134ed}; +inline constexpr uint64_t kMul = uint64_t{0x79d5f9e0de1e8cf5}; // Random data taken from the hexadecimal digits of Pi's fractional component. // https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number From d4e6b8cdb9f4a7855a2bf223d98f9998f5065672 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Fri, 27 Jun 2025 14:43:23 -0700 Subject: [PATCH 096/107] Mark hash_test as large so that the timeout is increased. PiperOrigin-RevId: 776724030 Change-Id: Ib8c69136f73c260fd2a823bbfcae262c70cd94e4 --- absl/hash/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel index 04882271605..e1fa1937b83 100644 --- a/absl/hash/BUILD.bazel +++ b/absl/hash/BUILD.bazel @@ -76,6 +76,7 @@ cc_library( cc_test( name = "hash_test", + size = "large", srcs = [ "hash_test.cc", "internal/hash_test.h", From 21db85548de7234f5a3221a88ef50718508091ba Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 30 Jun 2025 04:01:12 -0700 Subject: [PATCH 097/107] Automated Code Change PiperOrigin-RevId: 777502432 Change-Id: I7dab40452e7f879f08468bf5b0609f32d2591037 --- absl/base/spinlock_test_common.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/absl/base/spinlock_test_common.cc b/absl/base/spinlock_test_common.cc index dd8b3cb5e49..2af63b283ca 100644 --- a/absl/base/spinlock_test_common.cc +++ b/absl/base/spinlock_test_common.cc @@ -221,7 +221,7 @@ TEST(SpinLockWithThreads, StackSpinLock) { } TEST(SpinLockWithThreads, StackCooperativeSpinLock) { - SpinLock spinlock(base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL); + SpinLock spinlock; ThreadedTest(&spinlock); } @@ -272,8 +272,7 @@ TEST(SpinLockWithThreads, DoesNotDeadlock) { } }; - SpinLock stack_cooperative_spinlock( - base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL); + SpinLock stack_cooperative_spinlock; SpinLock stack_noncooperative_spinlock(base_internal::SCHEDULE_KERNEL_ONLY); Helper::DeadlockTest(&stack_cooperative_spinlock, base_internal::NumCPUs() * 2); @@ -289,7 +288,7 @@ TEST(SpinLockTest, IsCooperative) { SpinLock default_constructor; EXPECT_TRUE(SpinLockTest::IsCooperative(default_constructor)); - SpinLock cooperative(base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL); + SpinLock cooperative; EXPECT_TRUE(SpinLockTest::IsCooperative(cooperative)); SpinLock kernel_only(base_internal::SCHEDULE_KERNEL_ONLY); From d20bae7f9871846821c2b2114a94c1fcb6e1abf5 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Mon, 30 Jun 2025 10:51:14 -0700 Subject: [PATCH 098/107] Refactor: define CombineRawImpl for repeated `Mix(state ^ value, kMul)` operations. PiperOrigin-RevId: 777635780 Change-Id: I7b7305fd27ae552f8b96cc93adb3593385381089 --- absl/hash/internal/hash.cc | 6 +++--- absl/hash/internal/hash.h | 11 ++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/absl/hash/internal/hash.cc b/absl/hash/internal/hash.cc index 8dcc5bd4fd8..a693d5c1f73 100644 --- a/absl/hash/internal/hash.cc +++ b/absl/hash/internal/hash.cc @@ -101,9 +101,9 @@ ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit( const unsigned char* data, size_t len, uint64_t state) { // TODO(b/417141985): expose and use CityHash32WithSeed. // Note: we can't use PrecombineLengthMix here because len can be up to 1024. - return Mix((state + len) ^ hash_internal::CityHash32( - reinterpret_cast(data), len), - kMul); + return CombineRawImpl( + state + len, + hash_internal::CityHash32(reinterpret_cast(data), len)); } ABSL_ATTRIBUTE_NOINLINE uint64_t diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 8fd9e97b5d0..21728b0fb6b 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -1034,6 +1034,11 @@ inline uint32_t Read1To3(const unsigned char* p, size_t len) { return mem0 | mem1; } +ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineRawImpl(uint64_t state, + uint64_t value) { + return Mix(state ^ value, kMul); +} + // Slow dispatch path for calls to CombineContiguousImpl with a size argument // larger than inlined size. Has the same effect as calling // CombineContiguousImpl() repeatedly with the chunk stride size. @@ -1055,7 +1060,7 @@ ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineSmallContiguousImpl( // Empty string must modify the state. v = 0x57; } - return Mix(state ^ v, kMul); + return CombineRawImpl(state, v); } ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineContiguousImpl9to16( @@ -1263,7 +1268,7 @@ class ABSL_DLL MixingHashState : public HashStateBase { template ::value, int> = 0> static size_t hash_with_seed(T value, size_t seed) { return static_cast( - Mix(seed ^ static_cast>(value), kMul)); + CombineRawImpl(seed, static_cast>(value))); } template ::value, int> = 0> @@ -1301,7 +1306,7 @@ class ABSL_DLL MixingHashState : public HashStateBase { // optimize Read1To3 and Read4To8 differently for the string case. static MixingHashState combine_raw(MixingHashState hash_state, uint64_t value) { - return MixingHashState(Mix(hash_state.state_ ^ value, kMul)); + return MixingHashState(CombineRawImpl(hash_state.state_, value)); } static MixingHashState combine_weakly_mixed_integer( From 43a3a66d9d6b0a98bb77079d6f8a41f4247797e6 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 1 Jul 2025 08:23:44 -0700 Subject: [PATCH 099/107] Add a new test for hash collisions for short strings when PrecombineLengthMix has low quality. PiperOrigin-RevId: 778053000 Change-Id: I0be0fdb102b7ee345de81205cefe26698fdc61e6 --- absl/hash/hash_test.cc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index ab094a16269..c10000503ba 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -1239,6 +1239,30 @@ TEST(HashOf, DoubleSignCollision) { EXPECT_NE(absl::HashOf(-1.0), absl::HashOf(1.0)); } +// Test for collisions in short strings if PrecombineLengthMix is low quality. +TEST(PrecombineLengthMix, ShortStringCollision) { + std::string s1 = "00"; + std::string s2 = "000"; + constexpr char kMinChar = 0; + constexpr char kMaxChar = 32; + for (s1[0] = kMinChar; s1[0] < kMaxChar; ++s1[0]) { + for (s1[1] = kMinChar; s1[1] < kMaxChar; ++s1[1]) { + for (s2[0] = kMinChar; s2[0] < kMaxChar; ++s2[0]) { + for (s2[1] = kMinChar; s2[1] < kMaxChar; ++s2[1]) { + for (s2[2] = kMinChar; s2[2] < kMaxChar; ++s2[2]) { + ASSERT_NE(absl::HashOf(s1), absl::HashOf(s2)) + << "s1[0]: " << static_cast(s1[0]) + << "; s1[1]: " << static_cast(s1[1]) + << "; s2[0]: " << static_cast(s2[0]) + << "; s2[1]: " << static_cast(s2[1]) + << "; s2[2]: " << static_cast(s2[2]); + } + } + } + } + } +} + // Test that we don't cause excessive collisions on the hash table for // doubles in the range [-1024, 1024]. See cl/773069881 for more information. TEST(SwisstableCollisions, DoubleRange) { From 4471fbf436b0ec5fae81dae2581ae70e975ee457 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 1 Jul 2025 13:36:53 -0700 Subject: [PATCH 100/107] Roll forward: In debug mode, assert that the probe sequence isn't excessively long. This time, we assert only for non-default hash functions. PiperOrigin-RevId: 778171245 Change-Id: I53a93b20d57220252ffe2fd4f6c5f45a3fda74e7 --- absl/container/internal/raw_hash_set.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 94a3249a263..0cd8ad2575c 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -3016,13 +3016,14 @@ class raw_hash_set { absl::PrefetchToLocalCache(slot_array() + seq.offset()); #endif Group g{ctrl + seq.offset()}; + // TODO(b/424834054): assert that Match doesn't have too many collisions. for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return iterator_at(seq.offset(i)); } if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end(); seq.next(); - ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); + AssertOnProbe(seq); } } @@ -3280,6 +3281,7 @@ class raw_hash_set { absl::PrefetchToLocalCache(slot_array() + seq.offset()); #endif Group g{ctrl + seq.offset()}; + // TODO(b/424834054): assert that Match doesn't have too many collisions. for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return {iterator_at(seq.offset(i)), false}; @@ -3298,7 +3300,7 @@ class raw_hash_set { return {iterator_at(index), true}; } seq.next(); - ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); + AssertOnProbe(seq); } } @@ -3380,6 +3382,20 @@ class raw_hash_set { IterateOverFullSlots(common(), sizeof(slot_type), assert_consistent); } + void AssertOnProbe([[maybe_unused]] const probe_seq& seq) { + ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); + // We only assert that the hash function has good quality for non-default + // hash functions. + if constexpr (std::is_same_v>) return; + // TODO(b/424834054): investigate and see if we can remove the deleted + // elements condition. + ABSL_SWISSTABLE_ASSERT( + (seq.index() <= 256 || seq.index() <= capacity() / 2 || + !common().growth_info().HasNoDeleted()) && + "The hash function has low entropy and is non-default. Please replace " + "it with absl::Hash."); + } + // Attempts to find `key` in the table; if it isn't found, returns an iterator // where the value can be inserted into, with the control byte already set to // `key`'s H2. Returns a bool indicating whether an insertion can take place. From ddbb5170bb81762bca5493b1dda9160d76b5887b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Kosi=C5=84ski?= Date: Tue, 1 Jul 2025 18:31:16 -0700 Subject: [PATCH 101/107] Consistently use [[maybe_unused]] in raw_hash_set.h for better compiler warning compatibility. PiperOrigin-RevId: 778274640 Change-Id: I8075188f374b6a65743c1e7ffced5b0338c18031 --- absl/container/internal/raw_hash_set.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 0cd8ad2575c..5f5a1d9f61b 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -2388,7 +2388,7 @@ class raw_hash_set { size_t capacity() const { const size_t cap = common().capacity(); // Compiler complains when using functions in ASSUME so use local variable. - ABSL_ATTRIBUTE_UNUSED static constexpr size_t kDefaultCapacity = + [[maybe_unused]] static constexpr size_t kDefaultCapacity = DefaultCapacity(); ABSL_ASSUME(cap >= kDefaultCapacity); return cap; @@ -3155,8 +3155,7 @@ class raw_hash_set { std::move(tmp)); } - void annotate_for_bug_detection_on_move( - ABSL_ATTRIBUTE_UNUSED raw_hash_set& that) { + void annotate_for_bug_detection_on_move([[maybe_unused]] raw_hash_set& that) { // We only enable moved-from validation when generations are enabled (rather // than using NDEBUG) to avoid issues in which NDEBUG is enabled in some // translation units but not in others. @@ -3307,7 +3306,7 @@ class raw_hash_set { protected: // Asserts for correctness that we run on find/find_or_prepare_insert. template - void AssertOnFind(ABSL_ATTRIBUTE_UNUSED const K& key) { + void AssertOnFind([[maybe_unused]] const K& key) { AssertHashEqConsistent(key); AssertNotDebugCapacity(); } @@ -3366,7 +3365,7 @@ class raw_hash_set { const bool is_key_equal = equal_to(key, to_slot(slot)); if (!is_key_equal) return; - ABSL_ATTRIBUTE_UNUSED const bool is_hash_equal = + [[maybe_unused]] const bool is_hash_equal = hash_of_arg == hash_of(to_slot(slot)); assert((!is_key_equal || is_hash_equal) && "eq(k1, k2) must imply that hash(k1) == hash(k2). " @@ -3661,7 +3660,7 @@ struct HashtableFreeFunctionsAccess { c->erase_meta_only(it); return 1; } - ABSL_ATTRIBUTE_UNUSED const size_t original_size_for_assert = c->size(); + [[maybe_unused]] const size_t original_size_for_assert = c->size(); size_t num_deleted = 0; using SlotType = typename Set::slot_type; IterateOverFullSlots( From aad15c0e5b202d8223d075540ab157c5eece39c3 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Wed, 2 Jul 2025 09:39:21 -0700 Subject: [PATCH 102/107] Assume that control bytes don't alias CommonFields. PiperOrigin-RevId: 778538667 Change-Id: Ibcd7433923f88726cc83afd9bf09604bf3979eeb --- absl/container/internal/raw_hash_set.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 5f5a1d9f61b..3e764a5773a 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -964,7 +964,14 @@ class CommonFields : public CommonFieldsGenerationInfo { ctrl_t* control() const { ABSL_SWISSTABLE_ASSERT(capacity() > 0); - ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap_or_soo_.control().get()); + // Assume that the control bytes don't alias `this`. + ctrl_t* ctrl = heap_or_soo_.control().get(); + [[maybe_unused]] size_t num_control_bytes = NumControlBytes(capacity()); + ABSL_ASSUME(reinterpret_cast(ctrl + num_control_bytes) <= + reinterpret_cast(this) || + reinterpret_cast(this + 1) <= + reinterpret_cast(ctrl)); + ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(ctrl); } void set_control(ctrl_t* c) { heap_or_soo_.control().set(c); } From 2ecc1dd00cb4a0b5cd2ae7ee06f3f332f472e336 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 3 Jul 2025 00:48:37 -0700 Subject: [PATCH 103/107] Allow a `std::move` of `delimiter_` to happen in `ByString::ByString(ByString&&)`. Right now the move ctor is making a copy because the source object is `const`. PiperOrigin-RevId: 778791853 Change-Id: Ia1ef1a0c525aebc03509547b81c4f6e005971fa6 --- absl/strings/str_split.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h index 761568a31b1..cf53ccf1f1a 100644 --- a/absl/strings/str_split.h +++ b/absl/strings/str_split.h @@ -127,7 +127,7 @@ class ByString { absl::string_view Find(absl::string_view text, size_t pos) const; private: - const std::string delimiter_; + std::string delimiter_; }; // ByAsciiWhitespace From a09b217e30fb09f36bcbd5436d817eb6d72651ee Mon Sep 17 00:00:00 2001 From: Vitaly Goldshteyn Date: Mon, 7 Jul 2025 05:49:35 -0700 Subject: [PATCH 104/107] Roll back one more time: In debug mode, assert that the probe sequence isn't excessively long. PiperOrigin-RevId: 780073771 Change-Id: Id81d47b7a7d8dd3badf0ee1d8573bc41b892b362 --- absl/container/internal/raw_hash_set.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 3e764a5773a..51065c13c7e 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -3023,14 +3023,13 @@ class raw_hash_set { absl::PrefetchToLocalCache(slot_array() + seq.offset()); #endif Group g{ctrl + seq.offset()}; - // TODO(b/424834054): assert that Match doesn't have too many collisions. for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return iterator_at(seq.offset(i)); } if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end(); seq.next(); - AssertOnProbe(seq); + ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); } } @@ -3287,7 +3286,6 @@ class raw_hash_set { absl::PrefetchToLocalCache(slot_array() + seq.offset()); #endif Group g{ctrl + seq.offset()}; - // TODO(b/424834054): assert that Match doesn't have too many collisions. for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) return {iterator_at(seq.offset(i)), false}; @@ -3306,7 +3304,7 @@ class raw_hash_set { return {iterator_at(index), true}; } seq.next(); - AssertOnProbe(seq); + ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); } } @@ -3388,20 +3386,6 @@ class raw_hash_set { IterateOverFullSlots(common(), sizeof(slot_type), assert_consistent); } - void AssertOnProbe([[maybe_unused]] const probe_seq& seq) { - ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); - // We only assert that the hash function has good quality for non-default - // hash functions. - if constexpr (std::is_same_v>) return; - // TODO(b/424834054): investigate and see if we can remove the deleted - // elements condition. - ABSL_SWISSTABLE_ASSERT( - (seq.index() <= 256 || seq.index() <= capacity() / 2 || - !common().growth_info().HasNoDeleted()) && - "The hash function has low entropy and is non-default. Please replace " - "it with absl::Hash."); - } - // Attempts to find `key` in the table; if it isn't found, returns an iterator // where the value can be inserted into, with the control byte already set to // `key`'s H2. Returns a bool indicating whether an insertion can take place. From ea50280aeb0d595acfa3e3957e9ffbb9e59fd22d Mon Sep 17 00:00:00 2001 From: Chris Kennelly Date: Mon, 7 Jul 2025 15:27:31 -0700 Subject: [PATCH 105/107] Include deallocated caller-provided size in delete hooks. As allocations may come from unsized free or delete as well, we indicate these via std::nullopt. PiperOrigin-RevId: 780282346 Change-Id: I123b5a0541bdf26e44686de79821dbb1326c4cf6 --- absl/base/internal/low_level_alloc.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/absl/base/internal/low_level_alloc.cc b/absl/base/internal/low_level_alloc.cc index 158b60982f1..2dd36049640 100644 --- a/absl/base/internal/low_level_alloc.cc +++ b/absl/base/internal/low_level_alloc.cc @@ -19,6 +19,7 @@ #include "absl/base/internal/low_level_alloc.h" +#include #include #include "absl/base/call_once.h" From cf2b48cf458c18c9c5ff726ff27de273a4740d8b Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Tue, 8 Jul 2025 09:25:22 -0700 Subject: [PATCH 106/107] Perform stronger mixing on 32-bit platforms and enable the LowEntropyStrings test. Also avoid the conditional use of intrinsic 128-bit integers in Mix(), which is no longer needed. There is no generated code difference for x86-64 clang. PiperOrigin-RevId: 780592497 Change-Id: I2e0b05a78521f42e3eebcb7e99e8387c34fe7e33 --- absl/hash/hash_test.cc | 13 +------------ absl/hash/internal/hash.h | 21 +++------------------ 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc index c10000503ba..2a95ee57777 100644 --- a/absl/hash/hash_test.cc +++ b/absl/hash/hash_test.cc @@ -1266,11 +1266,6 @@ TEST(PrecombineLengthMix, ShortStringCollision) { // Test that we don't cause excessive collisions on the hash table for // doubles in the range [-1024, 1024]. See cl/773069881 for more information. TEST(SwisstableCollisions, DoubleRange) { -#ifdef GOOGLE_UNSUPPORTED_OS_LOONIX - // TODO(b/424834054): make this test pass on Loonix. - GTEST_SKIP() << "Test fails on Loonix."; -#endif - absl::flat_hash_set set; for (double t = -1024.0; t < 1024.0; t += 1.0) { set.insert(t); @@ -1282,12 +1277,6 @@ TEST(SwisstableCollisions, DoubleRange) { // Test that for each pair of adjacent bytes in a string, if there's only // entropy in those two bytes, then we don't have excessive collisions. TEST(SwisstableCollisions, LowEntropyStrings) { - if (sizeof(size_t) < 8) { - // TODO(b/424834054): make this test pass on 32-bit platforms. We need to - // make 32-bit Mix() stronger. - GTEST_SKIP() << "Test fails on 32-bit platforms"; - } - constexpr char kMinChar = 0; constexpr char kMaxChar = 64; // These sizes cover the different hashing cases. @@ -1302,7 +1291,7 @@ TEST(SwisstableCollisions, LowEntropyStrings) { set.insert(s); ASSERT_LT(HashtableDebugAccess::GetNumProbes(set, s), 64) - << size << " " << b; + << "size: " << size << "; bit: " << b; } } } diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h index 21728b0fb6b..0eeeb4e18d1 100644 --- a/absl/hash/internal/hash.h +++ b/absl/hash/internal/hash.h @@ -955,24 +955,9 @@ inline uint64_t PrecombineLengthMix(uint64_t state, size_t len) { } ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t Mix(uint64_t lhs, uint64_t rhs) { - // For 32 bit platforms we are trying to use all 64 lower bits. - if constexpr (sizeof(size_t) < 8) { - uint64_t m = lhs * rhs; - return m ^ absl::byteswap(m); - } - // absl::uint128 is not an alias or a thin wrapper around the intrinsic. - // We use the intrinsic when available to improve performance. - // TODO(b/399425325): Try to remove MulType since compiler seem to generate - // the same code with just absl::uint128. - // See https://gcc.godbolt.org/z/s3hGarraG for details. -#ifdef ABSL_HAVE_INTRINSIC_INT128 - using MulType = __uint128_t; -#else // ABSL_HAVE_INTRINSIC_INT128 - using MulType = absl::uint128; -#endif // ABSL_HAVE_INTRINSIC_INT128 - // Though the 128-bit product on AArch64 needs two instructions, it is - // still a good balance between speed and hash quality. - MulType m = lhs; + // Though the 128-bit product needs multiple instructions on non-x86-64 + // platforms, it is still a good balance between speed and hash quality. + absl::uint128 m = lhs; m *= rhs; return Uint128High64(m) ^ Uint128Low64(m); } From 733069ae183d6ce13656854b5fd7f9768f4c1259 Mon Sep 17 00:00:00 2001 From: Evan Brown Date: Wed, 9 Jul 2025 08:11:41 -0700 Subject: [PATCH 107/107] Refactor: move find_first_non_full into raw_hash_set.cc. PiperOrigin-RevId: 781058935 Change-Id: Ia3e4284b0b6064517b653907159285e3a642aa4b --- absl/container/internal/raw_hash_set.cc | 17 ++++++++++++----- absl/container/internal/raw_hash_set.h | 9 --------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc index 640c5a5be46..bd03ac17476 100644 --- a/absl/container/internal/raw_hash_set.cc +++ b/absl/container/internal/raw_hash_set.cc @@ -170,6 +170,18 @@ FindInfo find_first_non_full_from_h1(const ctrl_t* ctrl, size_t h1, } } +// Probes an array of control bits using a probe sequence derived from `hash`, +// and returns the offset corresponding to the first deleted or empty slot. +// +// Behavior when the entire table is full is undefined. +// +// NOTE: this function must work with tables having both empty and deleted +// slots in the same group. Such tables appear during `erase()`. +FindInfo find_first_non_full(const CommonFields& common, size_t hash) { + return find_first_non_full_from_h1(common.control(), H1(hash), + common.capacity()); +} + // Whether a table fits in half a group. A half-group table fits entirely into a // probing group, i.e., has a capacity < `Group::kWidth`. // @@ -246,11 +258,6 @@ void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) { ctrl[capacity] = ctrl_t::kSentinel; } -FindInfo find_first_non_full(const CommonFields& common, size_t hash) { - return find_first_non_full_from_h1(common.control(), H1(hash), - common.capacity()); -} - void IterateOverFullSlots(const CommonFields& c, size_t slot_size, absl::FunctionRef cb) { IterateOverFullSlotsImpl(c, slot_size, cb); diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index 51065c13c7e..205450a0f69 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -1429,15 +1429,6 @@ inline probe_seq probe(const CommonFields& common, size_t hash) { return probe(common.capacity(), hash); } -// Probes an array of control bits using a probe sequence derived from `hash`, -// and returns the offset corresponding to the first deleted or empty slot. -// -// Behavior when the entire table is full is undefined. -// -// NOTE: this function must work with tables having both empty and deleted -// slots in the same group. Such tables appear during `erase()`. -FindInfo find_first_non_full(const CommonFields& common, size_t hash); - constexpr size_t kProbedElementIndexSentinel = ~size_t{}; // Implementation detail of transfer_unprobed_elements_to_next_capacity_fn.