Skip to content

Conversation

philnik777
Copy link
Contributor

@philnik777 philnik777 commented Aug 28, 2025

Zen 2:
--------------------------------------------------------------------------------------------------------
Benchmark                                                                            old             new
--------------------------------------------------------------------------------------------------------
std::map<int, int>::insert_or_assign(key, value) (already present)/0             1.62 ns         1.53 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/32            5.78 ns         5.99 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/1024          21.5 ns         15.4 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/8192          26.2 ns         20.5 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/0                   22.5 ns         21.1 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/32                  42.9 ns         28.4 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/1024                 118 ns         92.0 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/8192                 227 ns          173 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/0     13.2 ns         18.9 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/32    65.6 ns         39.0 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/1024   127 ns         64.4 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/8192   134 ns         71.4 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/0           45.6 ns         37.3 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/32           142 ns         93.3 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/1024         288 ns          147 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/8192         368 ns          182 ns

Apple M4:
--------------------------------------------------------------------------------------------------------
Benchmark                                                                              old           new
--------------------------------------------------------------------------------------------------------
std::map<int, int>::insert_or_assign(key, value) (already present)/0              0.784 ns      0.740 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/32              2.52 ns       1.77 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/1024            8.72 ns       4.06 ns
std::map<int, int>::insert_or_assign(key, value) (already present)/8192            10.6 ns       3.98 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/0                     17.3 ns       17.2 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/32                    22.5 ns       19.3 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/1024                  56.8 ns       33.5 ns
std::map<int, int>::insert_or_assign(key, value) (new value)/8192                  88.2 ns       41.0 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/0       16.6 ns       11.8 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/32      13.7 ns       30.7 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/1024    46.7 ns       49.1 ns
std::map<std::string, int>::insert_or_assign(key, value) (already present)/8192    41.9 ns       76.9 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/0             40.0 ns       40.5 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/32            38.9 ns       40.0 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/1024          84.9 ns       96.9 ns
std::map<std::string, int>::insert_or_assign(key, value) (new value)/8192           166 ns        149 ns

@philnik777 philnik777 force-pushed the optimize_insert_or_assign branch from 2dbac71 to 61e7688 Compare August 28, 2025 11:36
Comment on lines +279 to +285
if constexpr (is_multi_key_container) {
st.PauseTiming();
for (std::size_t i = 0; i != BatchSize; ++i) {
c[i].erase(inserted[i]);
}
st.ResumeTiming();
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is always false.

Suggested change
if constexpr (is_multi_key_container) {
st.PauseTiming();
for (std::size_t i = 0; i != BatchSize; ++i) {
c[i].erase(inserted[i]);
}
st.ResumeTiming();
}

@ldionne ldionne marked this pull request as ready for review August 28, 2025 16:09
@ldionne ldionne requested a review from a team as a code owner August 28, 2025 16:09
@llvmbot llvmbot added the libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. label Aug 28, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 28, 2025

@llvm/pr-subscribers-libcxx

Author: Nikolas Klauser (philnik777)

Changes
Zen 2:
--------------------------------------------------------------------------------------------------------
Benchmark                                                                            old             new
--------------------------------------------------------------------------------------------------------
std::map&lt;int, int&gt;::insert_or_assign(key, value) (already present)/0             1.62 ns         1.53 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (already present)/32            5.78 ns         5.99 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (already present)/1024          21.5 ns         15.4 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (already present)/8192          26.2 ns         20.5 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (new value)/0                   22.5 ns         21.1 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (new value)/32                  42.9 ns         28.4 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (new value)/1024                 118 ns         92.0 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (new value)/8192                 227 ns          173 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (already present)/0     13.2 ns         18.9 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (already present)/32    65.6 ns         39.0 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (already present)/1024   127 ns         64.4 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (already present)/8192   134 ns         71.4 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (new value)/0           45.6 ns         37.3 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (new value)/32           142 ns         93.3 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (new value)/1024         288 ns          147 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (new value)/8192         368 ns          182 ns

Apple M4:
--------------------------------------------------------------------------------------------------------
Benchmark                                                                              old           new
--------------------------------------------------------------------------------------------------------
std::map&lt;int, int&gt;::insert_or_assign(key, value) (already present)/0              0.784 ns      0.740 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (already present)/32              2.52 ns       1.77 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (already present)/1024            8.72 ns       4.06 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (already present)/8192            10.6 ns       3.98 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (new value)/0                     17.3 ns       17.2 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (new value)/32                    22.5 ns       19.3 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (new value)/1024                  56.8 ns       33.5 ns
std::map&lt;int, int&gt;::insert_or_assign(key, value) (new value)/8192                  88.2 ns       41.0 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (already present)/0       16.6 ns       11.8 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (already present)/32      13.7 ns       30.7 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (already present)/1024    46.7 ns       49.1 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (already present)/8192    41.9 ns       76.9 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (new value)/0             40.0 ns       40.5 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (new value)/32            38.9 ns       40.0 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (new value)/1024          84.9 ns       96.9 ns
std::map&lt;std::string, int&gt;::insert_or_assign(key, value) (new value)/8192           166 ns        149 ns

Full diff: https://github.com/llvm/llvm-project/pull/155816.diff

3 Files Affected:

  • (modified) libcxx/docs/ReleaseNotes/22.rst (+1)
  • (modified) libcxx/include/map (+10-12)
  • (modified) libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h (+51)
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index 4ad5452cc29cc..cd8171dc5d1e8 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -57,6 +57,7 @@ Improvements and New Features
   has been improved by up to 3x
 - The performance of ``insert(iterator, iterator)`` of ``multimap`` and ``multiset`` has been improved by up to 2.5x
 - The performance of ``erase(iterator, iterator)`` in the unordered containers has been improved by up to 1.9x
+- The performance of ``map::insert_or_assign`` has been improved by up to 2x
 
 - ``ofstream::write`` has been optimized to pass through large strings to system calls directly instead of copying them
   in chunks into a buffer.
diff --git a/libcxx/include/map b/libcxx/include/map
index b53e1c4213487..395b434fe4a5b 100644
--- a/libcxx/include/map
+++ b/libcxx/include/map
@@ -1144,22 +1144,20 @@ public:
 
   template <class _Vp>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(const key_type& __k, _Vp&& __v) {
-    iterator __p = lower_bound(__k);
-    if (__p != end() && !key_comp()(__k, __p->first)) {
-      __p->second = std::forward<_Vp>(__v);
-      return std::make_pair(__p, false);
-    }
-    return std::make_pair(emplace_hint(__p, __k, std::forward<_Vp>(__v)), true);
+    auto __result              = __tree_.__emplace_unique(__k, std::forward<_Vp>(__v));
+    auto& [__iter, __inserted] = __result;
+    if (!__inserted)
+      __iter->second = std::forward<_Vp>(__v);
+    return __result;
   }
 
   template <class _Vp>
   _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(key_type&& __k, _Vp&& __v) {
-    iterator __p = lower_bound(__k);
-    if (__p != end() && !key_comp()(__k, __p->first)) {
-      __p->second = std::forward<_Vp>(__v);
-      return std::make_pair(__p, false);
-    }
-    return std::make_pair(emplace_hint(__p, std::move(__k), std::forward<_Vp>(__v)), true);
+    auto __result              = __tree_.__emplace_unique(std::move(__k), std::forward<_Vp>(__v));
+    auto& [__iter, __inserted] = __result;
+    if (!__inserted)
+      __iter->second = std::forward<_Vp>(__v);
+    return __result;
   }
 
   template <class _Vp>
diff --git a/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h b/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
index 535a52f0a08ab..3daa7e1843f5c 100644
--- a/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
+++ b/libcxx/test/benchmarks/containers/associative/associative_container_benchmarks.h
@@ -259,6 +259,57 @@ void associative_container_benchmarks(std::string container) {
     }
   });
 
+  if constexpr (is_map_like && !is_multi_key_container) {
+    bench("insert_or_assign(key, value) (already present)", [=](auto& st) {
+      const std::size_t size = st.range(0) ? st.range(0) : 1;
+      std::vector<Value> in  = make_value_types(generate_unique_keys(size));
+      Value to_insert        = in[in.size() / 2]; // pick any existing value
+      std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
+      typename Container::iterator inserted[BatchSize];
+
+      while (st.KeepRunningBatch(BatchSize)) {
+        for (std::size_t i = 0; i != BatchSize; ++i) {
+          inserted[i] =
+              adapt_operations<Container>::get_iterator(c[i].insert_or_assign(to_insert.first, to_insert.second));
+          benchmark::DoNotOptimize(inserted[i]);
+          benchmark::DoNotOptimize(c[i]);
+          benchmark::ClobberMemory();
+        }
+
+        if constexpr (is_multi_key_container) {
+          st.PauseTiming();
+          for (std::size_t i = 0; i != BatchSize; ++i) {
+            c[i].erase(inserted[i]);
+          }
+          st.ResumeTiming();
+        }
+      }
+    });
+
+    bench("insert_or_assign(key, value) (new value)", [=](auto& st) {
+      const std::size_t size = st.range(0);
+      std::vector<Value> in  = make_value_types(generate_unique_keys(size + 1));
+      Value to_insert        = in.back();
+      in.pop_back();
+      std::vector<Container> c(BatchSize, Container(in.begin(), in.end()));
+
+      while (st.KeepRunningBatch(BatchSize)) {
+        for (std::size_t i = 0; i != BatchSize; ++i) {
+          auto result = c[i].insert_or_assign(to_insert.first, to_insert.second);
+          benchmark::DoNotOptimize(result);
+          benchmark::DoNotOptimize(c[i]);
+          benchmark::ClobberMemory();
+        }
+
+        st.PauseTiming();
+        for (std::size_t i = 0; i != BatchSize; ++i) {
+          c[i].erase(get_key(to_insert));
+        }
+        st.ResumeTiming();
+      }
+    });
+  }
+
   // The insert(hint, ...) methods are only relevant for ordered containers, and we lack
   // a good way to compute a hint for unordered ones.
   if constexpr (is_ordered_container) {

Copy link
Member

@ldionne ldionne left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with a minor comment, and also please add a short explanation of the optimization in the commit message. This is great!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. performance
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants