See: https://github.com/llvm/llvm-project/issues/61987 Fix suggested by: @philnik and @var-const Reviewers: philnik, ldionne, EricWF, var-const Differential Revision: https://reviews.llvm.org/D147741 Testing: ninja check-cxx check-clang check-llvm Benchmark Testcases (BM_CopyConstruct, and BM_Assignment) added. performance improvement: Run on (8 X 4800 MHz CPU s) CPU Caches: L1 Data 48 KiB (x4) L1 Instruction 32 KiB (x4) L2 Unified 1280 KiB (x4) L3 Unified 12288 KiB (x1) Load Average: 1.66, 3.02, 2.43 Comparing build-runtimes-base/libcxx/benchmarks/vector_operations.libcxx.out to build-runtimes/libcxx/benchmarks/vector_operations.libcxx.out Benchmark Time CPU Time Old Time New CPU Old CPU New ---------------------------------------------------------------------------------------------------------------------------------------- BM_ConstructSize/vector_byte/5140480 +0.0362 +0.0362 116906 121132 116902 121131 BM_CopyConstruct/vector_int/5140480 -0.4563 -0.4577 1755224 954241 1755330 951987 BM_Assignment/vector_int/5140480 -0.0222 -0.0220 990045 968095 989917 968125 BM_ConstructSizeValue/vector_byte/5140480 +0.0308 +0.0307 116970 120567 116977 120573 BM_ConstructIterIter/vector_char/1024 -0.0831 -0.0831 19 17 19 17 BM_ConstructIterIter/vector_size_t/1024 +0.0129 +0.0131 88 89 88 89 BM_ConstructIterIter/vector_string/1024 -0.0064 -0.0018 54455 54109 54208 54112 OVERALL_GEOMEAN -0.0845 -0.0842 0 0 0 0 FYI, the perf improvements for BM_CopyConstruct due to this patch is mostly subsumed by the https://reviews.llvm.org/D149826. However this patch still adds value by converting copy to memmove (the second testcase). Before the patch: ``` define linkonce_odr dso_local void @_ZNSt3__16vectorIiNS_9allocatorIiEEE18__construct_at_endIPiS5_EEvT_T0_m(ptr noundef nonnull align 8 dereferenceable(24) %0, ptr noundef %1, ptr noundef %2, i64 noundef %3) local_unnamed_addr #4 comdat align 2 { %5 = getelementptr inbounds %"class.std::__1::vector", ptr %0, i64 0, i32 1 %6 = load ptr, ptr %5, align 8, !tbaa !12 %7 = icmp eq ptr %1, %2 br i1 %7, label %16, label %8 8: ; preds = %4, %8 %9 = phi ptr [ %13, %8 ], [ %1, %4 ] %10 = phi ptr [ %14, %8 ], [ %6, %4 ] %11 = icmp ne ptr %10, null tail call void @llvm.assume(i1 %11) %12 = load i32, ptr %9, align 4, !tbaa !14 store i32 %12, ptr %10, align 4, !tbaa !14 %13 = getelementptr inbounds i32, ptr %9, i64 1 %14 = getelementptr inbounds i32, ptr %10, i64 1 %15 = icmp eq ptr %13, %2 br i1 %15, label %16, label %8, !llvm.loop !16 16: ; preds = %8, %4 %17 = phi ptr [ %6, %4 ], [ %14, %8 ] store ptr %17, ptr %5, align 8, !tbaa !12 ret void } ``` After the patch: ``` define linkonce_odr dso_local void @_ZNSt3__16vectorIiNS_9allocatorIiEEE18__construct_at_endIPiS5_EEvT_T0_m(ptr noundef nonnull align 8 dereferenceable(24) %0, ptr noundef %1, ptr noundef %2, i64 noundef %3) local_unnamed_addr #4 comdat align 2 { %5 = getelementptr inbounds %"class.std::__1::vector", ptr %0, i64 0, i32 1 %6 = load ptr, ptr %5, align 8, !tbaa !12 %7 = ptrtoint ptr %2 to i64 %8 = ptrtoint ptr %1 to i64 %9 = sub i64 %7, %8 %10 = ashr exact i64 %9, 2 tail call void @llvm.memmove.p0.p0.i64(ptr align 4 %6, ptr align 4 %1, i64 %9, i1 false) %11 = getelementptr inbounds i32, ptr %6, i64 %10 store ptr %11, ptr %5, align 8, !tbaa !12 ret void } ``` This is due to the optimized version of uninitialized_allocator_copy function.
177 lines
5.0 KiB
C++
177 lines
5.0 KiB
C++
// -*- C++ -*-
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef BENCHMARK_CONTAINER_BENCHMARKS_H
|
|
#define BENCHMARK_CONTAINER_BENCHMARKS_H
|
|
|
|
#include <cassert>
|
|
|
|
#include "Utilities.h"
|
|
#include "benchmark/benchmark.h"
|
|
|
|
namespace ContainerBenchmarks {
|
|
|
|
template <class Container>
|
|
void BM_ConstructSize(benchmark::State& st, Container) {
|
|
auto size = st.range(0);
|
|
for (auto _ : st) {
|
|
Container c(size);
|
|
DoNotOptimizeData(c);
|
|
}
|
|
}
|
|
|
|
template <class Container>
|
|
void BM_CopyConstruct(benchmark::State& st, Container) {
|
|
auto size = st.range(0);
|
|
Container c(size);
|
|
for (auto _ : st) {
|
|
auto v = c;
|
|
DoNotOptimizeData(v);
|
|
}
|
|
}
|
|
|
|
template <class Container>
|
|
void BM_Assignment(benchmark::State& st, Container) {
|
|
auto size = st.range(0);
|
|
Container c1;
|
|
Container c2(size);
|
|
for (auto _ : st) {
|
|
c1 = c2;
|
|
DoNotOptimizeData(c1);
|
|
DoNotOptimizeData(c2);
|
|
}
|
|
}
|
|
|
|
template <class Container>
|
|
void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::value_type const& val) {
|
|
const auto size = st.range(0);
|
|
for (auto _ : st) {
|
|
Container c(size, val);
|
|
DoNotOptimizeData(c);
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto begin = in.begin();
|
|
const auto end = in.end();
|
|
benchmark::DoNotOptimize(&in);
|
|
while (st.KeepRunning()) {
|
|
Container c(begin, end);
|
|
DoNotOptimizeData(c);
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.end();
|
|
while (st.KeepRunning()) {
|
|
c.clear();
|
|
for (auto it = in.begin(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.insert(*it).first));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.end();
|
|
while (st.KeepRunning()) {
|
|
c.clear();
|
|
c.rehash(16);
|
|
for (auto it = in.begin(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.insert(*it).first));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_InsertDuplicate(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.end();
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(&c);
|
|
benchmark::DoNotOptimize(&in);
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.begin(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.insert(*it).first));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_EmplaceDuplicate(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.end();
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(&c);
|
|
benchmark::DoNotOptimize(&in);
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.begin(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.emplace(*it).first));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
static void BM_Find(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(&(*c.begin()));
|
|
const auto end = in.data() + in.size();
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.data(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.find(*it)));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
static void BM_FindRehash(benchmark::State& st, Container c, GenInputs gen) {
|
|
c.rehash(8);
|
|
auto in = gen(st.range(0));
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(&(*c.begin()));
|
|
const auto end = in.data() + in.size();
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.data(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.find(*it)));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
static void BM_Rehash(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
c.max_load_factor(3.0);
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(c);
|
|
const auto bucket_count = c.bucket_count();
|
|
while (st.KeepRunning()) {
|
|
c.rehash(bucket_count + 1);
|
|
c.rehash(bucket_count);
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
} // end namespace ContainerBenchmarks
|
|
|
|
#endif // BENCHMARK_CONTAINER_BENCHMARKS_H
|