Files
clang-p2996/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp
Nikolas Klauser 985c1a44f8 [libc++] Optimize the two range overload of mismatch (#86853)
```
-----------------------------------------------------------------------------
Benchmark                                                 old             new
-----------------------------------------------------------------------------
bm_mismatch_two_range_overload<char>/1               0.941 ns         1.88 ns
bm_mismatch_two_range_overload<char>/2                1.43 ns         2.15 ns
bm_mismatch_two_range_overload<char>/3                1.95 ns         2.55 ns
bm_mismatch_two_range_overload<char>/4                2.58 ns         2.90 ns
bm_mismatch_two_range_overload<char>/5                3.75 ns         3.31 ns
bm_mismatch_two_range_overload<char>/6                5.00 ns         3.83 ns
bm_mismatch_two_range_overload<char>/7                5.59 ns         4.35 ns
bm_mismatch_two_range_overload<char>/8                6.37 ns         4.84 ns
bm_mismatch_two_range_overload<char>/16               11.8 ns         6.72 ns
bm_mismatch_two_range_overload<char>/64               45.5 ns         2.59 ns
bm_mismatch_two_range_overload<char>/512               366 ns         12.6 ns
bm_mismatch_two_range_overload<char>/4096             2890 ns         91.6 ns
bm_mismatch_two_range_overload<char>/32768           23038 ns          758 ns
bm_mismatch_two_range_overload<char>/262144         142813 ns         6573 ns
bm_mismatch_two_range_overload<char>/1048576        366679 ns        26710 ns
bm_mismatch_two_range_overload<short>/1              0.934 ns         1.88 ns
bm_mismatch_two_range_overload<short>/2               1.30 ns         2.58 ns
bm_mismatch_two_range_overload<short>/3               1.76 ns         3.28 ns
bm_mismatch_two_range_overload<short>/4               2.24 ns         3.98 ns
bm_mismatch_two_range_overload<short>/5               2.80 ns         4.92 ns
bm_mismatch_two_range_overload<short>/6               3.58 ns         6.01 ns
bm_mismatch_two_range_overload<short>/7               4.29 ns         7.03 ns
bm_mismatch_two_range_overload<short>/8               4.67 ns         7.39 ns
bm_mismatch_two_range_overload<short>/16              9.86 ns         13.1 ns
bm_mismatch_two_range_overload<short>/64              38.9 ns         4.55 ns
bm_mismatch_two_range_overload<short>/512              348 ns         27.7 ns
bm_mismatch_two_range_overload<short>/4096            2881 ns          225 ns
bm_mismatch_two_range_overload<short>/32768          23111 ns         1715 ns
bm_mismatch_two_range_overload<short>/262144        184846 ns        14416 ns
bm_mismatch_two_range_overload<short>/1048576       742885 ns        57264 ns
bm_mismatch_two_range_overload<int>/1                0.838 ns         1.19 ns
bm_mismatch_two_range_overload<int>/2                 1.19 ns         1.65 ns
bm_mismatch_two_range_overload<int>/3                 1.83 ns         2.06 ns
bm_mismatch_two_range_overload<int>/4                 2.38 ns         2.42 ns
bm_mismatch_two_range_overload<int>/5                 3.60 ns         2.47 ns
bm_mismatch_two_range_overload<int>/6                 3.68 ns         3.05 ns
bm_mismatch_two_range_overload<int>/7                 4.32 ns         3.36 ns
bm_mismatch_two_range_overload<int>/8                 5.18 ns         3.58 ns
bm_mismatch_two_range_overload<int>/16                10.6 ns         2.84 ns
bm_mismatch_two_range_overload<int>/64                39.0 ns         7.78 ns
bm_mismatch_two_range_overload<int>/512                247 ns         53.9 ns
bm_mismatch_two_range_overload<int>/4096              1927 ns          429 ns
bm_mismatch_two_range_overload<int>/32768            15569 ns         3393 ns
bm_mismatch_two_range_overload<int>/262144          125413 ns        28504 ns
bm_mismatch_two_range_overload<int>/1048576         504549 ns       112729 ns
```
2024-04-01 18:21:51 +02:00

217 lines
6.8 KiB
C++

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// <algorithm>
// template<InputIterator Iter1, InputIterator Iter2>
// requires HasEqualTo<Iter1::value_type, Iter2::value_type>
// constexpr pair<Iter1, Iter2> // constexpr after c++17
// mismatch(Iter1 first1, Iter1 last1, Iter2 first2);
//
// template<InputIterator Iter1, InputIterator Iter2Pred>
// constexpr pair<Iter1, Iter2> // constexpr after c++17
// mismatch(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2); // C++14
//
// template<InputIterator Iter1, InputIterator Iter2,
// Predicate<auto, Iter1::value_type, Iter2::value_type> Pred>
// requires CopyConstructible<Pred>
// constexpr pair<Iter1, Iter2> // constexpr after c++17
// mismatch(Iter1 first1, Iter1 last1, Iter2 first2, Pred pred);
//
// template<InputIterator Iter1, InputIterator Iter2, Predicate Pred>
// constexpr pair<Iter1, Iter2> // constexpr after c++17
// mismatch(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2, Pred pred); // C++14
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=50000000
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=100000000
#include <algorithm>
#include <array>
#include <cassert>
#include <vector>
#include "test_macros.h"
#include "test_iterators.h"
#include "type_algorithms.h"
template <class Iter, class Container1, class Container2>
TEST_CONSTEXPR_CXX20 void check(Container1 lhs, Container2 rhs, size_t offset) {
if (lhs.size() == rhs.size()) {
assert(std::mismatch(Iter(lhs.data()), Iter(lhs.data() + lhs.size()), Iter(rhs.data())) ==
std::make_pair(Iter(lhs.data() + offset), Iter(rhs.data() + offset)));
assert(std::mismatch(Iter(lhs.data()),
Iter(lhs.data() + lhs.size()),
Iter(rhs.data()),
std::equal_to<typename Container1::value_type>()) ==
std::make_pair(Iter(lhs.data() + offset), Iter(rhs.data() + offset)));
}
#if TEST_STD_VER >= 14
assert(
std::mismatch(Iter(lhs.data()), Iter(lhs.data() + lhs.size()), Iter(rhs.data()), Iter(rhs.data() + rhs.size())) ==
std::make_pair(Iter(lhs.data() + offset), Iter(rhs.data() + offset)));
assert(std::mismatch(Iter(lhs.data()),
Iter(lhs.data() + lhs.size()),
Iter(rhs.data()),
Iter(rhs.data() + rhs.size()),
std::equal_to<typename Container1::value_type>()) ==
std::make_pair(Iter(lhs.data() + offset), Iter(rhs.data() + offset)));
#endif
}
struct NonTrivial {
int i_;
TEST_CONSTEXPR_CXX20 NonTrivial(int i) : i_(i) {}
TEST_CONSTEXPR_CXX20 NonTrivial(NonTrivial&& other) : i_(other.i_) { other.i_ = 0; }
TEST_CONSTEXPR_CXX20 friend bool operator==(const NonTrivial& lhs, const NonTrivial& rhs) { return lhs.i_ == rhs.i_; }
};
struct ModTwoComp {
TEST_CONSTEXPR_CXX20 bool operator()(int lhs, int rhs) { return lhs % 2 == rhs % 2; }
};
template <class Iter>
TEST_CONSTEXPR_CXX20 bool test() {
{ // empty ranges
std::array<int, 0> lhs = {};
std::array<int, 0> rhs = {};
check<Iter>(lhs, rhs, 0);
}
{ // same range without mismatch
std::array<int, 8> lhs = {0, 1, 2, 3, 0, 1, 2, 3};
std::array<int, 8> rhs = {0, 1, 2, 3, 0, 1, 2, 3};
check<Iter>(lhs, rhs, 8);
}
{ // same range with mismatch
std::array<int, 8> lhs = {0, 1, 2, 2, 0, 1, 2, 3};
std::array<int, 8> rhs = {0, 1, 2, 3, 0, 1, 2, 3};
check<Iter>(lhs, rhs, 3);
}
{ // second range is smaller
std::array<int, 8> lhs = {0, 1, 2, 2, 0, 1, 2, 3};
std::array<int, 2> rhs = {0, 1};
check<Iter>(lhs, rhs, 2);
}
{ // first range is smaller
std::array<int, 2> lhs = {0, 1};
std::array<int, 8> rhs = {0, 1, 2, 2, 0, 1, 2, 3};
check<Iter>(lhs, rhs, 2);
}
{ // use a custom comparator
std::array<int, 4> lhs = {0, 2, 3, 4};
std::array<int, 4> rhs = {0, 0, 4, 4};
assert(std::mismatch(lhs.data(), lhs.data() + lhs.size(), rhs.data(), ModTwoComp()) ==
std::make_pair(lhs.data() + 2, rhs.data() + 2));
#if TEST_STD_VER >= 14
assert(std::mismatch(lhs.data(), lhs.data() + lhs.size(), rhs.data(), rhs.data() + rhs.size(), ModTwoComp()) ==
std::make_pair(lhs.data() + 2, rhs.data() + 2));
#endif
}
return true;
}
struct Test {
template <class Iter>
TEST_CONSTEXPR_CXX20 void operator()() {
test<Iter>();
}
};
TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::cpp17_input_iterator_list<int*>(), Test());
{ // use a non-integer type to also test the general case - all elements match
std::array<NonTrivial, 8> lhs = {1, 2, 3, 4, 5, 6, 7, 8};
std::array<NonTrivial, 8> rhs = {1, 2, 3, 4, 5, 6, 7, 8};
check<NonTrivial*>(std::move(lhs), std::move(rhs), 8);
}
{ // use a non-integer type to also test the general case - not all elements match
std::array<NonTrivial, 8> lhs = {1, 2, 3, 4, 7, 6, 7, 8};
std::array<NonTrivial, 8> rhs = {1, 2, 3, 4, 5, 6, 7, 8};
check<NonTrivial*>(std::move(lhs), std::move(rhs), 4);
}
return true;
}
int main(int, char**) {
test();
#if TEST_STD_VER >= 20
static_assert(test());
#endif
{ // check with a lot of elements to test the vectorization optimization
{
std::vector<char> lhs(256);
std::vector<char> rhs(256);
for (size_t i = 0; i != lhs.size(); ++i) {
lhs[i] = 1;
check<char*>(lhs, rhs, i);
lhs[i] = 0;
rhs[i] = 1;
check<char*>(lhs, rhs, i);
rhs[i] = 0;
}
}
{
std::vector<int> lhs(256);
std::vector<int> rhs(256);
for (size_t i = 0; i != lhs.size(); ++i) {
lhs[i] = 1;
check<int*>(lhs, rhs, i);
lhs[i] = 0;
rhs[i] = 1;
check<int*>(lhs, rhs, i);
rhs[i] = 0;
}
}
}
{ // check the tail of the vectorized loop
for (size_t vec_size = 1; vec_size != 256; ++vec_size) {
{
std::vector<char> lhs(vec_size);
std::vector<char> rhs(vec_size);
check<char*>(lhs, rhs, lhs.size());
lhs.back() = 1;
check<char*>(lhs, rhs, lhs.size() - 1);
lhs.back() = 0;
rhs.back() = 1;
check<char*>(lhs, rhs, lhs.size() - 1);
rhs.back() = 0;
}
{
std::vector<int> lhs(vec_size);
std::vector<int> rhs(vec_size);
check<int*>(lhs, rhs, lhs.size());
lhs.back() = 1;
check<int*>(lhs, rhs, lhs.size() - 1);
lhs.back() = 0;
rhs.back() = 1;
check<int*>(lhs, rhs, lhs.size() - 1);
rhs.back() = 0;
}
}
}
return 0;
}