[libc++] Introduce __product_iterator_traits and optimise flat_map::insert (#139454)

Fixes #108624

This allows `flat_map::insert(Iter, Iter)` to directly forward to
underlying containers' `insert(Iter, Iter)`, instead of inserting one
element at a time, when input models "product iterator". atm,
`flat_map::iterator` and `zip_view::iterator` are "product iterator"s.

This gives about almost 10x speed up in my benchmark with -03 (for both
before and after)

```cpp
Benchmark                                                          Time             CPU      Time Old      Time New       CPU Old       CPU New
-----------------------------------------------------------------------------------------------------------------------------------------------
flat_map::insert_product_iterator_flat_map/32                   -0.5028         -0.5320           149            74           149            70
flat_map::insert_product_iterator_flat_map/1024                 -0.8617         -0.8618          3113           430          3112           430
flat_map::insert_product_iterator_flat_map/8192                 -0.8877         -0.8877         26682          2995         26679          2995
flat_map::insert_product_iterator_flat_map/65536                -0.8769         -0.8769        226235         27844        226221         27841
flat_map::insert_product_iterator_zip/32                        -0.5844         -0.5844           162            67           162            67
flat_map::insert_product_iterator_zip/1024                      -0.8754         -0.8754          3427           427          3427           427
flat_map::insert_product_iterator_zip/8192                      -0.8934         -0.8934         28134          3000         28132          3000
flat_map::insert_product_iterator_zip/65536                     -0.8783         -0.8783        229783         27960        229767         27958
OVERALL_GEOMEAN                                                 -0.8319         -0.8332             0             0             0             0
```

---------

Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
This commit is contained in:
Hui
2025-06-28 13:42:50 +01:00
committed by GitHub
parent feb61f5b05
commit 34b2e934ea
12 changed files with 287 additions and 5 deletions

View File

@@ -81,6 +81,9 @@ Improvements and New Features
- The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets
with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively.
- The ``flat_map::insert`` and ``flat_set::insert_range`` have been optimized, resulting in a performance improvement of up
to 10x for inserting elements into a ``flat_map`` when the input range is a ``flat_map`` or a ``zip_view``.
Deprecations and Removals
-------------------------

View File

@@ -498,6 +498,7 @@ set(files
__iterator/ostreambuf_iterator.h
__iterator/permutable.h
__iterator/prev.h
__iterator/product_iterator.h
__iterator/projected.h
__iterator/ranges_iterator_traits.h
__iterator/readable_traits.h

View File

@@ -13,9 +13,12 @@
#include <__compare/three_way_comparable.h>
#include <__concepts/convertible_to.h>
#include <__config>
#include <__cstddef/size_t.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/product_iterator.h>
#include <__memory/addressof.h>
#include <__type_traits/conditional.h>
#include <__utility/forward.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -57,6 +60,8 @@ private:
template <class, class, class, bool>
friend struct __key_value_iterator;
friend struct __product_iterator_traits<__key_value_iterator>;
public:
using iterator_concept = random_access_iterator_tag;
// `__key_value_iterator` only satisfy "Cpp17InputIterator" named requirements, because
@@ -181,6 +186,29 @@ public:
}
};
template <class _Owner, class _KeyContainer, class _MappedContainer, bool _Const>
struct __product_iterator_traits<__key_value_iterator<_Owner, _KeyContainer, _MappedContainer, _Const>> {
static constexpr size_t __size = 2;
template <size_t _Nth, class _Iter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static decltype(auto) __get_iterator_element(_Iter&& __it)
requires(_Nth <= 1)
{
if constexpr (_Nth == 0) {
return std::forward<_Iter>(__it).__key_iter_;
} else {
return std::forward<_Iter>(__it).__mapped_iter_;
}
}
template <class _KeyIter, class _MappedIter>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto
__make_product_iterator(_KeyIter&& __key_iter, _MappedIter&& __mapped_iter) {
return __key_value_iterator<_Owner, _KeyContainer, _MappedContainer, _Const>(
std::forward<_KeyIter>(__key_iter), std::forward<_MappedIter>(__mapped_iter));
}
};
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 23

View File

@@ -11,6 +11,7 @@
#define _LIBCPP___FLAT_MAP_UTILS_H
#include <__config>
#include <__iterator/product_iterator.h>
#include <__type_traits/container_traits.h>
#include <__utility/exception_guard.h>
#include <__utility/forward.h>
@@ -79,8 +80,6 @@ struct __flat_map_utils {
return typename _Map::iterator(std::move(__key_it), std::move(__mapped_it));
}
// TODO: We could optimize this, see
// https://github.com/llvm/llvm-project/issues/108624
template <class _Map, class _InputIterator, class _Sentinel>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static typename _Map::size_type
__append(_Map& __map, _InputIterator __first, _Sentinel __last) {
@@ -93,6 +92,25 @@ struct __flat_map_utils {
}
return __num_appended;
}
template <class _Map, class _InputIterator>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static typename _Map::size_type
__append(_Map& __map, _InputIterator __first, _InputIterator __last)
requires __is_product_iterator_of_size<_InputIterator, 2>::value
{
auto __s1 = __map.__containers_.keys.size();
__map.__containers_.keys.insert(
__map.__containers_.keys.end(),
__product_iterator_traits<_InputIterator>::template __get_iterator_element<0>(__first),
__product_iterator_traits<_InputIterator>::template __get_iterator_element<0>(__last));
__map.__containers_.values.insert(
__map.__containers_.values.end(),
__product_iterator_traits<_InputIterator>::template __get_iterator_element<1>(__first),
__product_iterator_traits<_InputIterator>::template __get_iterator_element<1>(__last));
return __map.__containers_.keys.size() - __s1;
}
};
_LIBCPP_END_NAMESPACE_STD

View File

@@ -0,0 +1,76 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___ITERATOR_PRODUCT_ITERATOR_H
#define _LIBCPP___ITERATOR_PRODUCT_ITERATOR_H
// Product iterators are iterators that contain two or more underlying iterators.
//
// For example, std::flat_map stores its data into two separate containers, and its iterator
// is a proxy over two separate underlying iterators. The concept of product iterators
// allows algorithms to operate over these underlying iterators separately, opening the
// door to various optimizations.
//
// If __product_iterator_traits can be instantiated, the following functions and associated types must be provided:
// - static constexpr size_t Traits::__size
// The number of underlying iterators inside the product iterator.
//
// - template <size_t _N>
// static decltype(auto) Traits::__get_iterator_element(It&& __it)
// Returns the _Nth iterator element of the given product iterator.
//
// - template <class... _Iters>
// static _Iterator __make_product_iterator(_Iters&&...);
// Creates a product iterator from the given underlying iterators.
#include <__config>
#include <__cstddef/size_t.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/integral_constant.h>
#include <__utility/declval.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Iterator>
struct __product_iterator_traits;
/* exposition-only:
{
static constexpr size_t __size = ...;
template <size_t _N, class _Iter>
static decltype(auto) __get_iterator_element(_Iter&&);
template <class... _Iters>
static _Iterator __make_product_iterator(_Iters&&...);
};
*/
template <class _Tp, size_t = 0>
struct __is_product_iterator : false_type {};
template <class _Tp>
struct __is_product_iterator<_Tp, sizeof(__product_iterator_traits<_Tp>) * 0> : true_type {};
template <class _Tp, size_t _Size, class = void>
struct __is_product_iterator_of_size : false_type {};
template <class _Tp, size_t _Size>
struct __is_product_iterator_of_size<_Tp, _Size, __enable_if_t<__product_iterator_traits<_Tp>::__size == _Size> >
: true_type {};
template <class _Iterator, size_t _Nth>
using __product_iterator_element_t _LIBCPP_NODEBUG =
decltype(__product_iterator_traits<_Iterator>::template __get_iterator_element<_Nth>(std::declval<_Iterator>()));
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ITERATOR_PRODUCT_ITERATOR_H

View File

@@ -23,6 +23,7 @@
#include <__iterator/iter_move.h>
#include <__iterator/iter_swap.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/product_iterator.h>
#include <__ranges/access.h>
#include <__ranges/all.h>
#include <__ranges/concepts.h>
@@ -251,6 +252,10 @@ class zip_view<_Views...>::__iterator : public __zip_view_iterator_category_base
friend class zip_view<_Views...>;
static constexpr bool __is_zip_view_iterator = true;
friend struct __product_iterator_traits<__iterator>;
public:
using iterator_concept = decltype(ranges::__get_zip_view_iterator_tag<_Const, _Views...>());
using value_type = tuple<range_value_t<__maybe_const<_Const, _Views>>...>;
@@ -468,6 +473,23 @@ inline constexpr auto zip = __zip::__fn{};
} // namespace views
} // namespace ranges
template <class _Iterator>
requires _Iterator::__is_zip_view_iterator
struct __product_iterator_traits<_Iterator> {
static constexpr size_t __size = tuple_size<decltype(std::declval<_Iterator>().__current_)>::value;
template <size_t _Nth, class _Iter>
requires(_Nth < __size)
_LIBCPP_HIDE_FROM_ABI static constexpr decltype(auto) __get_iterator_element(_Iter&& __it) {
return std::get<_Nth>(std::forward<_Iter>(__it).__current_);
}
template <class... _Iters>
_LIBCPP_HIDE_FROM_ABI static constexpr _Iterator __make_product_iterator(_Iters&&... __iters) {
return _Iterator(std::tuple(std::forward<_Iters>(__iters)...));
}
};
#endif // _LIBCPP_STD_VER >= 23
_LIBCPP_END_NAMESPACE_STD

View File

@@ -1522,6 +1522,7 @@ module std [system] {
}
module permutable { header "__iterator/permutable.h" }
module prev { header "__iterator/prev.h" }
module product_iterator { header "__iterator/product_iterator.h" }
module projected { header "__iterator/projected.h" }
module ranges_iterator_traits { header "__iterator/ranges_iterator_traits.h" }
module readable_traits { header "__iterator/readable_traits.h" }

View File

@@ -13,12 +13,14 @@
#include <iterator>
#include <random>
#include <string>
#include <ranges>
#include <type_traits>
#include <utility>
#include <vector>
#include "benchmark/benchmark.h"
#include "../../GenerateInput.h"
#include "test_macros.h"
namespace support {
@@ -66,6 +68,8 @@ void associative_container_benchmarks(std::string container) {
static constexpr bool is_ordered_container = requires(Container c, Key k) { c.lower_bound(k); };
static constexpr bool is_map_like = requires { typename Container::mapped_type; };
// These benchmarks are structured to perform the operation being benchmarked
// a small number of times at each iteration, in order to offset the cost of
// PauseTiming() and ResumeTiming().
@@ -321,6 +325,48 @@ void associative_container_benchmarks(std::string container) {
}
});
if constexpr (is_map_like) {
bench("insert(iterator, iterator) (product_iterator from same type)", [=](auto& st) {
const std::size_t size = st.range(0);
std::vector<Value> in = make_value_types(generate_unique_keys(size + (size / 10)));
Container source(in.begin(), in.end());
Container c;
for ([[maybe_unused]] auto _ : st) {
c.insert(source.begin(), source.end());
benchmark::DoNotOptimize(c);
benchmark::ClobberMemory();
st.PauseTiming();
c = Container();
st.ResumeTiming();
}
});
#if TEST_STD_VER >= 23
bench("insert(iterator, iterator) (product_iterator from zip_view)", [=](auto& st) {
const std::size_t size = st.range(0);
std::vector<Key> keys = generate_unique_keys(size + (size / 10));
std::sort(keys.begin(), keys.end());
std::vector<typename Container::mapped_type> mapped(keys.size());
auto source = std::views::zip(keys, mapped);
Container c;
for ([[maybe_unused]] auto _ : st) {
c.insert(source.begin(), source.end());
benchmark::DoNotOptimize(c);
benchmark::ClobberMemory();
st.PauseTiming();
c = Container();
st.ResumeTiming();
}
});
#endif
}
/////////////////////////
// Erasure
/////////////////////////

View File

@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
// REQUIRES: std-at-least-c++26
// REQUIRES: std-at-least-c++23
#include <flat_map>
#include <utility>

View File

@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
// REQUIRES: std-at-least-c++26
// REQUIRES: std-at-least-c++23
#include <flat_map>

View File

@@ -0,0 +1,66 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
// gcc 15 does not seem to recognize the __product_iterator_traits specializations
// UNSUPPORTED: gcc
#include <flat_map>
#include <ranges>
#include <type_traits>
#include <utility>
#include <vector>
#include "test_macros.h"
#include "test_iterators.h"
constexpr bool test() {
{
// Test that the __get_iterator_element can handle a non-copyable iterator
int Date[] = {1, 2, 3, 4};
cpp20_input_iterator<int*> iter(Date);
sentinel_wrapper<cpp20_input_iterator<int*>> sent{cpp20_input_iterator<int*>(Date + 4)};
std::ranges::subrange r1(std::move(iter), std::move(sent));
auto v = std::views::zip(std::move(r1), std::views::iota(0, 4));
auto it = v.begin();
using Iter = decltype(it);
static_assert(!std::is_copy_constructible_v<Iter>);
static_assert(std::__product_iterator_traits<Iter>::__size == 2);
std::same_as<cpp20_input_iterator<int*>&> decltype(auto) it1 =
std::__product_iterator_traits<Iter>::__get_iterator_element<0>(it);
assert(*it1 == 1);
}
if (!std::is_constant_evaluated()) {
// Test __make_product_iterator
using M = std::flat_map<int, int>;
M m{{1, 1}, {2, 2}, {3, 3}};
using Iter = std::ranges::iterator_t<const M>;
const auto& keys = m.keys();
const auto& values = m.values();
auto it_keys = std::ranges::begin(keys);
auto it_values = std::ranges::begin(values);
auto it = std::__product_iterator_traits<Iter>::__make_product_iterator(it_keys, it_values);
assert(it->first == 1);
assert(it->second == 1);
}
return true;
}
int main(int, char**) {
test();
static_assert(test());
return 0;
}

View File

@@ -18,6 +18,7 @@
#include <cassert>
#include <functional>
#include <deque>
#include <ranges>
#include "MinSequenceContainer.h"
#include "../helpers.h"
@@ -95,6 +96,26 @@ constexpr void test() {
});
}
constexpr void test_product_iterator() {
using M = std::flat_map<int, int>;
{
M m1{{1, 1}, {2, 1}, {3, 1}};
M m2{{4, 1}, {5, 1}, {6, 1}};
m1.insert(m2.begin(), m2.end());
M expected{{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}, {6, 1}};
assert(m1 == expected);
}
{
std::vector<int> keys{1, 2, 3};
std::vector<int> values{1, 1, 1};
auto zv = std::views::zip(keys, values);
M m;
m.insert(zv.begin(), zv.end());
M expected{{1, 1}, {2, 1}, {3, 1}};
assert(m == expected);
}
}
constexpr bool test() {
test<std::vector<int>, std::vector<double>>();
#ifndef __cpp_lib_constexpr_deque
@@ -105,7 +126,7 @@ constexpr bool test() {
}
test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
test_product_iterator();
if (!TEST_IS_CONSTANT_EVALUATED) {
auto insert_func = [](auto& m, const auto& newValues) { m.insert(newValues.begin(), newValues.end()); };
test_insert_range_exception_guarantee(insert_func);