If the comparison operation is equivalent to < and that is a total order, we know that we can use equality comparison on that type instead to extract some information. Furthermore, if equality comparison on that type is trivial, the user can't observe that we're calling it. So instead of using the user-provided total order, we use std::mismatch, which uses equality comparison (and is vertorized). Additionally, if the type is trivially lexicographically comparable, we can go one step further and use std::memcmp directly instead of calling std::mismatch. Benchmarks: ``` ------------------------------------------------------------------------------------- Benchmark old new ------------------------------------------------------------------------------------- bm_lexicographical_compare<unsigned char>/1 1.17 ns 2.34 ns bm_lexicographical_compare<unsigned char>/2 1.64 ns 2.57 ns bm_lexicographical_compare<unsigned char>/3 2.23 ns 2.58 ns bm_lexicographical_compare<unsigned char>/4 2.82 ns 2.57 ns bm_lexicographical_compare<unsigned char>/5 3.34 ns 2.11 ns bm_lexicographical_compare<unsigned char>/6 3.94 ns 2.21 ns bm_lexicographical_compare<unsigned char>/7 4.56 ns 2.11 ns bm_lexicographical_compare<unsigned char>/8 5.25 ns 2.11 ns bm_lexicographical_compare<unsigned char>/16 9.88 ns 2.11 ns bm_lexicographical_compare<unsigned char>/64 38.9 ns 2.36 ns bm_lexicographical_compare<unsigned char>/512 317 ns 6.54 ns bm_lexicographical_compare<unsigned char>/4096 2517 ns 41.4 ns bm_lexicographical_compare<unsigned char>/32768 20052 ns 488 ns bm_lexicographical_compare<unsigned char>/262144 159579 ns 4409 ns bm_lexicographical_compare<unsigned char>/1048576 640456 ns 20342 ns bm_lexicographical_compare<signed char>/1 1.18 ns 2.37 ns bm_lexicographical_compare<signed char>/2 1.65 ns 2.60 ns bm_lexicographical_compare<signed char>/3 2.23 ns 2.83 ns bm_lexicographical_compare<signed char>/4 2.81 ns 3.06 ns bm_lexicographical_compare<signed char>/5 3.35 ns 3.30 ns bm_lexicographical_compare<signed char>/6 3.90 ns 3.99 ns bm_lexicographical_compare<signed char>/7 4.56 ns 3.78 ns bm_lexicographical_compare<signed char>/8 5.20 ns 4.02 ns bm_lexicographical_compare<signed char>/16 9.80 ns 6.21 ns bm_lexicographical_compare<signed char>/64 39.0 ns 3.16 ns bm_lexicographical_compare<signed char>/512 318 ns 7.58 ns bm_lexicographical_compare<signed char>/4096 2514 ns 47.4 ns bm_lexicographical_compare<signed char>/32768 20096 ns 504 ns bm_lexicographical_compare<signed char>/262144 156617 ns 4146 ns bm_lexicographical_compare<signed char>/1048576 624265 ns 19810 ns bm_lexicographical_compare<int>/1 1.15 ns 2.12 ns bm_lexicographical_compare<int>/2 1.60 ns 2.36 ns bm_lexicographical_compare<int>/3 2.21 ns 2.59 ns bm_lexicographical_compare<int>/4 2.74 ns 2.83 ns bm_lexicographical_compare<int>/5 3.26 ns 3.06 ns bm_lexicographical_compare<int>/6 3.81 ns 4.53 ns bm_lexicographical_compare<int>/7 4.41 ns 4.72 ns bm_lexicographical_compare<int>/8 5.08 ns 2.36 ns bm_lexicographical_compare<int>/16 9.54 ns 3.08 ns bm_lexicographical_compare<int>/64 37.8 ns 4.71 ns bm_lexicographical_compare<int>/512 309 ns 24.6 ns bm_lexicographical_compare<int>/4096 2422 ns 204 ns bm_lexicographical_compare<int>/32768 19362 ns 1947 ns bm_lexicographical_compare<int>/262144 155727 ns 19793 ns bm_lexicographical_compare<int>/1048576 623614 ns 80180 ns bm_ranges_lexicographical_compare<unsigned char>/1 1.07 ns 2.35 ns bm_ranges_lexicographical_compare<unsigned char>/2 1.72 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/3 2.46 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/4 3.17 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/5 3.86 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/6 4.55 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/7 5.25 ns 2.12 ns bm_ranges_lexicographical_compare<unsigned char>/8 5.95 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/16 11.7 ns 2.13 ns bm_ranges_lexicographical_compare<unsigned char>/64 45.5 ns 2.36 ns bm_ranges_lexicographical_compare<unsigned char>/512 366 ns 6.35 ns bm_ranges_lexicographical_compare<unsigned char>/4096 2886 ns 40.9 ns bm_ranges_lexicographical_compare<unsigned char>/32768 23054 ns 489 ns bm_ranges_lexicographical_compare<unsigned char>/262144 185302 ns 4339 ns bm_ranges_lexicographical_compare<unsigned char>/1048576 741576 ns 19430 ns bm_ranges_lexicographical_compare<signed char>/1 1.10 ns 2.12 ns bm_ranges_lexicographical_compare<signed char>/2 1.66 ns 2.35 ns bm_ranges_lexicographical_compare<signed char>/3 2.23 ns 2.58 ns bm_ranges_lexicographical_compare<signed char>/4 2.82 ns 2.82 ns bm_ranges_lexicographical_compare<signed char>/5 3.34 ns 3.06 ns bm_ranges_lexicographical_compare<signed char>/6 3.92 ns 3.99 ns bm_ranges_lexicographical_compare<signed char>/7 4.64 ns 4.10 ns bm_ranges_lexicographical_compare<signed char>/8 5.21 ns 4.61 ns bm_ranges_lexicographical_compare<signed char>/16 9.79 ns 7.42 ns bm_ranges_lexicographical_compare<signed char>/64 38.9 ns 2.93 ns bm_ranges_lexicographical_compare<signed char>/512 317 ns 7.31 ns bm_ranges_lexicographical_compare<signed char>/4096 2500 ns 47.5 ns bm_ranges_lexicographical_compare<signed char>/32768 19940 ns 496 ns bm_ranges_lexicographical_compare<signed char>/262144 159166 ns 4393 ns bm_ranges_lexicographical_compare<signed char>/1048576 638206 ns 19786 ns bm_ranges_lexicographical_compare<int>/1 1.10 ns 2.12 ns bm_ranges_lexicographical_compare<int>/2 1.64 ns 3.04 ns bm_ranges_lexicographical_compare<int>/3 2.23 ns 2.58 ns bm_ranges_lexicographical_compare<int>/4 2.81 ns 2.81 ns bm_ranges_lexicographical_compare<int>/5 3.35 ns 3.05 ns bm_ranges_lexicographical_compare<int>/6 3.94 ns 4.60 ns bm_ranges_lexicographical_compare<int>/7 4.60 ns 4.81 ns bm_ranges_lexicographical_compare<int>/8 5.19 ns 2.35 ns bm_ranges_lexicographical_compare<int>/16 9.85 ns 2.87 ns bm_ranges_lexicographical_compare<int>/64 38.9 ns 4.70 ns bm_ranges_lexicographical_compare<int>/512 318 ns 24.5 ns bm_ranges_lexicographical_compare<int>/4096 2494 ns 202 ns bm_ranges_lexicographical_compare<int>/32768 20000 ns 1939 ns bm_ranges_lexicographical_compare<int>/262144 160433 ns 19730 ns bm_ranges_lexicographical_compare<int>/1048576 642636 ns 80760 ns ```
235 lines
9.2 KiB
C++
235 lines
9.2 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef _LIBCPP___STRING_CONSTEXPR_C_FUNCTIONS_H
|
|
#define _LIBCPP___STRING_CONSTEXPR_C_FUNCTIONS_H
|
|
|
|
#include <__config>
|
|
#include <__memory/addressof.h>
|
|
#include <__memory/construct_at.h>
|
|
#include <__type_traits/datasizeof.h>
|
|
#include <__type_traits/is_always_bitcastable.h>
|
|
#include <__type_traits/is_assignable.h>
|
|
#include <__type_traits/is_constant_evaluated.h>
|
|
#include <__type_traits/is_constructible.h>
|
|
#include <__type_traits/is_equality_comparable.h>
|
|
#include <__type_traits/is_same.h>
|
|
#include <__type_traits/is_trivially_copyable.h>
|
|
#include <__type_traits/is_trivially_lexicographically_comparable.h>
|
|
#include <__type_traits/remove_cv.h>
|
|
#include <__utility/is_pointer_in_range.h>
|
|
#include <cstddef>
|
|
|
|
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
|
# pragma GCC system_header
|
|
#endif
|
|
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
// Type used to encode that a function takes an integer that represents a number
|
|
// of elements as opposed to a number of bytes.
|
|
enum class __element_count : size_t {};
|
|
|
|
template <class _Tp>
|
|
inline const bool __is_char_type = false;
|
|
|
|
template <>
|
|
inline const bool __is_char_type<char> = true;
|
|
|
|
#ifndef _LIBCPP_HAS_NO_CHAR8_T
|
|
template <>
|
|
inline const bool __is_char_type<char8_t> = true;
|
|
#endif
|
|
|
|
template <class _Tp>
|
|
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 size_t __constexpr_strlen(const _Tp* __str) _NOEXCEPT {
|
|
static_assert(__is_char_type<_Tp>, "__constexpr_strlen only works with char and char8_t");
|
|
// GCC currently doesn't support __builtin_strlen for heap-allocated memory during constant evaluation.
|
|
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70816
|
|
if (__libcpp_is_constant_evaluated()) {
|
|
#if _LIBCPP_STD_VER >= 17 && defined(_LIBCPP_COMPILER_CLANG_BASED)
|
|
if constexpr (is_same_v<_Tp, char>)
|
|
return __builtin_strlen(__str);
|
|
#endif
|
|
size_t __i = 0;
|
|
for (; __str[__i] != '\0'; ++__i)
|
|
;
|
|
return __i;
|
|
}
|
|
return __builtin_strlen(reinterpret_cast<const char*>(__str));
|
|
}
|
|
|
|
// Because of __is_trivially_lexicographically_comparable_v we know that comparing the object representations is
|
|
// equivalent to a std::memcmp. Since we have multiple objects contiguously in memory, we can call memcmp once instead
|
|
// of invoking it on every object individually.
|
|
template <class _Tp, class _Up>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int
|
|
__constexpr_memcmp(const _Tp* __lhs, const _Up* __rhs, __element_count __n) {
|
|
static_assert(__is_trivially_lexicographically_comparable_v<_Tp, _Up>,
|
|
"_Tp and _Up have to be trivially lexicographically comparable");
|
|
|
|
auto __count = static_cast<size_t>(__n);
|
|
|
|
if (__libcpp_is_constant_evaluated()) {
|
|
#ifdef _LIBCPP_COMPILER_CLANG_BASED
|
|
if (sizeof(_Tp) == 1 && !is_same<_Tp, bool>::value)
|
|
return __builtin_memcmp(__lhs, __rhs, __count * sizeof(_Tp));
|
|
#endif
|
|
|
|
while (__count != 0) {
|
|
if (*__lhs < *__rhs)
|
|
return -1;
|
|
if (*__rhs < *__lhs)
|
|
return 1;
|
|
|
|
--__count;
|
|
++__lhs;
|
|
++__rhs;
|
|
}
|
|
return 0;
|
|
} else {
|
|
return __builtin_memcmp(__lhs, __rhs, __count * sizeof(_Tp));
|
|
}
|
|
}
|
|
|
|
// Because of __libcpp_is_trivially_equality_comparable we know that comparing the object representations is equivalent
|
|
// to a std::memcmp(...) == 0. Since we have multiple objects contiguously in memory, we can call memcmp once instead
|
|
// of invoking it on every object individually.
|
|
template <class _Tp, class _Up>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
|
|
__constexpr_memcmp_equal(const _Tp* __lhs, const _Up* __rhs, __element_count __n) {
|
|
static_assert(__libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
|
|
"_Tp and _Up have to be trivially equality comparable");
|
|
|
|
auto __count = static_cast<size_t>(__n);
|
|
|
|
if (__libcpp_is_constant_evaluated()) {
|
|
#ifdef _LIBCPP_COMPILER_CLANG_BASED
|
|
if (sizeof(_Tp) == 1 && is_integral<_Tp>::value && !is_same<_Tp, bool>::value)
|
|
return __builtin_memcmp(__lhs, __rhs, __count * sizeof(_Tp)) == 0;
|
|
#endif
|
|
while (__count != 0) {
|
|
if (*__lhs != *__rhs)
|
|
return false;
|
|
|
|
--__count;
|
|
++__lhs;
|
|
++__rhs;
|
|
}
|
|
return true;
|
|
} else {
|
|
return ::__builtin_memcmp(__lhs, __rhs, __count * sizeof(_Tp)) == 0;
|
|
}
|
|
}
|
|
|
|
template <class _Tp, class _Up>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __constexpr_memchr(_Tp* __str, _Up __value, size_t __count) {
|
|
static_assert(sizeof(_Tp) == 1 && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
|
|
"Calling memchr on non-trivially equality comparable types is unsafe.");
|
|
|
|
if (__libcpp_is_constant_evaluated()) {
|
|
// use __builtin_char_memchr to optimize constexpr evaluation if we can
|
|
#if _LIBCPP_STD_VER >= 17 && __has_builtin(__builtin_char_memchr)
|
|
if constexpr (is_same_v<remove_cv_t<_Tp>, char> && is_same_v<remove_cv_t<_Up>, char>)
|
|
return __builtin_char_memchr(__str, __value, __count);
|
|
#endif
|
|
|
|
for (; __count; --__count) {
|
|
if (*__str == __value)
|
|
return __str;
|
|
++__str;
|
|
}
|
|
return nullptr;
|
|
} else {
|
|
char __value_buffer = 0;
|
|
__builtin_memcpy(&__value_buffer, &__value, sizeof(char));
|
|
return static_cast<_Tp*>(__builtin_memchr(__str, __value_buffer, __count));
|
|
}
|
|
}
|
|
|
|
// This function performs an assignment to an existing, already alive TriviallyCopyable object
|
|
// from another TriviallyCopyable object.
|
|
//
|
|
// It basically works around the fact that TriviallyCopyable objects are not required to be
|
|
// syntactically copy/move constructible or copy/move assignable. Technically, only one of the
|
|
// four operations is required to be syntactically valid -- but at least one definitely has to
|
|
// be valid.
|
|
//
|
|
// This is necessary in order to implement __constexpr_memmove below in a way that mirrors as
|
|
// closely as possible what the compiler's __builtin_memmove is able to do.
|
|
template <class _Tp, class _Up, __enable_if_t<is_assignable<_Tp&, _Up const&>::value, int> = 0>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp& __assign_trivially_copyable(_Tp& __dest, _Up const& __src) {
|
|
__dest = __src;
|
|
return __dest;
|
|
}
|
|
|
|
// clang-format off
|
|
template <class _Tp, class _Up, __enable_if_t<!is_assignable<_Tp&, _Up const&>::value &&
|
|
is_assignable<_Tp&, _Up&&>::value, int> = 0>
|
|
// clang-format on
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp& __assign_trivially_copyable(_Tp& __dest, _Up& __src) {
|
|
__dest =
|
|
static_cast<_Up&&>(__src); // this is safe, we're not actually moving anything since the assignment is trivial
|
|
return __dest;
|
|
}
|
|
|
|
// clang-format off
|
|
template <class _Tp, class _Up, __enable_if_t<!is_assignable<_Tp&, _Up const&>::value &&
|
|
!is_assignable<_Tp&, _Up&&>::value &&
|
|
is_constructible<_Tp, _Up const&>::value, int> = 0>
|
|
// clang-format on
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& __assign_trivially_copyable(_Tp& __dest, _Up const& __src) {
|
|
// _Tp is trivially destructible, so we don't need to call its destructor to end the lifetime of the object
|
|
// that was there previously
|
|
std::__construct_at(std::addressof(__dest), __src);
|
|
return __dest;
|
|
}
|
|
|
|
// clang-format off
|
|
template <class _Tp, class _Up, __enable_if_t<!is_assignable<_Tp&, _Up const&>::value &&
|
|
!is_assignable<_Tp&, _Up&&>::value &&
|
|
!is_constructible<_Tp, _Up const&>::value &&
|
|
is_constructible<_Tp, _Up&&>::value, int> = 0>
|
|
// clang-format on
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& __assign_trivially_copyable(_Tp& __dest, _Up& __src) {
|
|
// _Tp is trivially destructible, so we don't need to call its destructor to end the lifetime of the object
|
|
// that was there previously
|
|
std::__construct_at(
|
|
std::addressof(__dest),
|
|
static_cast<_Up&&>(__src)); // this is safe, we're not actually moving anything since the constructor is trivial
|
|
return __dest;
|
|
}
|
|
|
|
template <class _Tp, class _Up, __enable_if_t<__is_always_bitcastable<_Up, _Tp>::value, int> = 0>
|
|
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
|
|
__constexpr_memmove(_Tp* __dest, _Up* __src, __element_count __n) {
|
|
size_t __count = static_cast<size_t>(__n);
|
|
if (__libcpp_is_constant_evaluated()) {
|
|
#ifdef _LIBCPP_COMPILER_CLANG_BASED
|
|
if (is_same<__remove_cv_t<_Tp>, __remove_cv_t<_Up> >::value) {
|
|
::__builtin_memmove(__dest, __src, __count * sizeof(_Tp));
|
|
return __dest;
|
|
}
|
|
#endif
|
|
if (std::__is_pointer_in_range(__src, __src + __count, __dest)) {
|
|
for (; __count > 0; --__count)
|
|
std::__assign_trivially_copyable(__dest[__count - 1], __src[__count - 1]);
|
|
} else {
|
|
for (size_t __i = 0; __i != __count; ++__i)
|
|
std::__assign_trivially_copyable(__dest[__i], __src[__i]);
|
|
}
|
|
} else if (__count > 0) {
|
|
::__builtin_memmove(__dest, __src, (__count - 1) * sizeof(_Tp) + __datasizeof_v<_Tp>);
|
|
}
|
|
return __dest;
|
|
}
|
|
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
|
|
#endif // _LIBCPP___STRING_CONSTEXPR_C_FUNCTIONS_H
|