Codify that we use lower_case for readability-identifier-naming.ConstexprFunctionCase and then fix the 11 violations (rather than codify UPPER_CASE and have to fix the 170 violations).
574 lines
21 KiB
C++
574 lines
21 KiB
C++
//===-- Generic implementation of memory function building blocks ---------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file provides generic C++ building blocks.
|
|
// Depending on the requested size, the block operation uses unsigned integral
|
|
// types, vector types or an array of the type with the maximum size.
|
|
//
|
|
// The maximum size is passed as a template argument. For instance, on x86
|
|
// platforms that only supports integral types the maximum size would be 8
|
|
// (corresponding to uint64_t). On this platform if we request the size 32, this
|
|
// would be treated as a cpp::array<uint64_t, 4>.
|
|
//
|
|
// On the other hand, if the platform is x86 with support for AVX the maximum
|
|
// size is 32 and the operation can be handled with a single native operation.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
|
|
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
|
|
|
|
#include "src/__support/CPP/array.h"
|
|
#include "src/__support/CPP/type_traits.h"
|
|
#include "src/__support/common.h"
|
|
#include "src/__support/endian.h"
|
|
#include "src/__support/macros/optimization.h"
|
|
#include "src/string/memory_utils/op_builtin.h"
|
|
#include "src/string/memory_utils/utils.h"
|
|
|
|
#include <stdint.h>
|
|
|
|
static_assert((UINTPTR_MAX == 4294967295U) ||
|
|
(UINTPTR_MAX == 18446744073709551615UL),
|
|
"We currently only support 32- or 64-bit platforms");
|
|
|
|
#if defined(UINT64_MAX)
|
|
#define LLVM_LIBC_HAS_UINT64
|
|
#endif
|
|
|
|
namespace LIBC_NAMESPACE {
|
|
// Compiler types using the vector attributes.
|
|
using generic_v128 = uint8_t __attribute__((__vector_size__(16)));
|
|
using generic_v256 = uint8_t __attribute__((__vector_size__(32)));
|
|
using generic_v512 = uint8_t __attribute__((__vector_size__(64)));
|
|
} // namespace LIBC_NAMESPACE
|
|
|
|
namespace LIBC_NAMESPACE::generic {
|
|
|
|
// We accept three types of values as elements for generic operations:
|
|
// - scalar : unsigned integral types,
|
|
// - vector : compiler types using the vector attributes or platform builtins,
|
|
// - array : a cpp::array<T, N> where T is itself either a scalar or a vector.
|
|
// The following traits help discriminate between these cases.
|
|
|
|
template <typename T> struct is_scalar : cpp::false_type {};
|
|
template <> struct is_scalar<uint8_t> : cpp::true_type {};
|
|
template <> struct is_scalar<uint16_t> : cpp::true_type {};
|
|
template <> struct is_scalar<uint32_t> : cpp::true_type {};
|
|
#ifdef LLVM_LIBC_HAS_UINT64
|
|
template <> struct is_scalar<uint64_t> : cpp::true_type {};
|
|
#endif // LLVM_LIBC_HAS_UINT64
|
|
template <typename T> constexpr bool is_scalar_v = is_scalar<T>::value;
|
|
|
|
template <typename T> struct is_vector : cpp::false_type {};
|
|
template <> struct is_vector<generic_v128> : cpp::true_type {};
|
|
template <> struct is_vector<generic_v256> : cpp::true_type {};
|
|
template <> struct is_vector<generic_v512> : cpp::true_type {};
|
|
template <typename T> constexpr bool is_vector_v = is_vector<T>::value;
|
|
|
|
template <class T> struct is_array : cpp::false_type {};
|
|
template <class T, size_t N> struct is_array<cpp::array<T, N>> {
|
|
static constexpr bool value = is_scalar_v<T> || is_vector_v<T>;
|
|
};
|
|
template <typename T> constexpr bool is_array_v = is_array<T>::value;
|
|
|
|
template <typename T>
|
|
constexpr bool is_element_type_v =
|
|
is_scalar_v<T> || is_vector_v<T> || is_array_v<T>;
|
|
|
|
// Helper struct to retrieve the number of elements of an array.
|
|
template <class T> struct array_size {};
|
|
template <class T, size_t N>
|
|
struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
|
|
template <typename T> constexpr size_t array_size_v = array_size<T>::value;
|
|
|
|
// Generic operations for the above type categories.
|
|
|
|
template <typename T> T load(CPtr src) {
|
|
static_assert(is_element_type_v<T>);
|
|
if constexpr (is_scalar_v<T> || is_vector_v<T>) {
|
|
return ::LIBC_NAMESPACE::load<T>(src);
|
|
} else if constexpr (is_array_v<T>) {
|
|
using value_type = typename T::value_type;
|
|
T Value;
|
|
for (size_t I = 0; I < array_size_v<T>; ++I)
|
|
Value[I] = load<value_type>(src + (I * sizeof(value_type)));
|
|
return Value;
|
|
}
|
|
}
|
|
|
|
template <typename T> void store(Ptr dst, T value) {
|
|
static_assert(is_element_type_v<T>);
|
|
if constexpr (is_scalar_v<T> || is_vector_v<T>) {
|
|
::LIBC_NAMESPACE::store<T>(dst, value);
|
|
} else if constexpr (is_array_v<T>) {
|
|
using value_type = typename T::value_type;
|
|
for (size_t I = 0; I < array_size_v<T>; ++I)
|
|
store<value_type>(dst + (I * sizeof(value_type)), value[I]);
|
|
}
|
|
}
|
|
|
|
template <typename T> T splat(uint8_t value) {
|
|
static_assert(is_scalar_v<T> || is_vector_v<T>);
|
|
if constexpr (is_scalar_v<T>)
|
|
return T(~0) / T(0xFF) * T(value);
|
|
else if constexpr (is_vector_v<T>) {
|
|
T Out;
|
|
// This for loop is optimized out for vector types.
|
|
for (size_t i = 0; i < sizeof(T); ++i)
|
|
Out[i] = value;
|
|
return Out;
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Memset
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T> struct Memset {
|
|
static_assert(is_element_type_v<T>);
|
|
static constexpr size_t SIZE = sizeof(T);
|
|
|
|
LIBC_INLINE static void block(Ptr dst, uint8_t value) {
|
|
if constexpr (is_scalar_v<T> || is_vector_v<T>) {
|
|
store<T>(dst, splat<T>(value));
|
|
} else if constexpr (is_array_v<T>) {
|
|
using value_type = typename T::value_type;
|
|
const auto Splat = splat<value_type>(value);
|
|
for (size_t I = 0; I < array_size_v<T>; ++I)
|
|
store<value_type>(dst + (I * sizeof(value_type)), Splat);
|
|
}
|
|
}
|
|
|
|
LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
|
|
block(dst + count - SIZE, value);
|
|
}
|
|
|
|
LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
|
|
block(dst, value);
|
|
tail(dst, value, count);
|
|
}
|
|
|
|
LIBC_INLINE static void loop_and_tail_offset(Ptr dst, uint8_t value,
|
|
size_t count, size_t offset) {
|
|
static_assert(SIZE > 1, "a loop of size 1 does not need tail");
|
|
do {
|
|
block(dst + offset, value);
|
|
offset += SIZE;
|
|
} while (offset < count - SIZE);
|
|
tail(dst, value, count);
|
|
}
|
|
|
|
LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
|
|
return loop_and_tail_offset(dst, value, count, 0);
|
|
}
|
|
};
|
|
|
|
template <typename T, typename... TS> struct MemsetSequence {
|
|
static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
|
|
LIBC_INLINE static void block(Ptr dst, uint8_t value) {
|
|
Memset<T>::block(dst, value);
|
|
if constexpr (sizeof...(TS) > 0)
|
|
return MemsetSequence<TS...>::block(dst + sizeof(T), value);
|
|
}
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Memmove
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename T> struct Memmove {
|
|
static_assert(is_element_type_v<T>);
|
|
static constexpr size_t SIZE = sizeof(T);
|
|
|
|
LIBC_INLINE static void block(Ptr dst, CPtr src) {
|
|
store<T>(dst, load<T>(src));
|
|
}
|
|
|
|
LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
|
|
const size_t offset = count - SIZE;
|
|
// The load and store operations can be performed in any order as long as
|
|
// they are not interleaved. More investigations are needed to determine
|
|
// the best order.
|
|
const auto head = load<T>(src);
|
|
const auto tail = load<T>(src + offset);
|
|
store<T>(dst, head);
|
|
store<T>(dst + offset, tail);
|
|
}
|
|
|
|
// Align forward suitable when dst < src. The alignment is performed with
|
|
// an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
|
|
//
|
|
// e.g. Moving two bytes forward, we make sure src is aligned.
|
|
// [ | | | | ]
|
|
// [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
|
|
// [____LLLLLLLL_____________________]
|
|
// [___________LLLLLLLA______________]
|
|
// [_SSSSSSSS________________________]
|
|
// [________SSSSSSSS_________________]
|
|
//
|
|
// e.g. Moving two bytes forward, we make sure dst is aligned.
|
|
// [ | | | | ]
|
|
// [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
|
|
// [____LLLLLLLL_____________________]
|
|
// [______LLLLLLLL___________________]
|
|
// [_SSSSSSSS________________________]
|
|
// [___SSSSSSSA______________________]
|
|
template <Arg AlignOn>
|
|
LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
|
|
Ptr prev_dst = dst;
|
|
CPtr prev_src = src;
|
|
size_t prev_count = count;
|
|
align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
|
|
adjust(SIZE, dst, src, count);
|
|
head_tail(prev_dst, prev_src, prev_count - count);
|
|
}
|
|
|
|
// Align backward suitable when dst > src. The alignment is performed with
|
|
// an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
|
|
//
|
|
// e.g. Moving two bytes backward, we make sure src is aligned.
|
|
// [ | | | | ]
|
|
// [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
|
|
// [ _________________ALLLLLLL_______]
|
|
// [ ___________________LLLLLLLL_____]
|
|
// [____________________SSSSSSSS_____]
|
|
// [______________________SSSSSSSS___]
|
|
//
|
|
// e.g. Moving two bytes backward, we make sure dst is aligned.
|
|
// [ | | | | ]
|
|
// [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
|
|
// [ _______________LLLLLLLL_________]
|
|
// [ ___________________LLLLLLLL_____]
|
|
// [__________________ASSSSSSS_______]
|
|
// [______________________SSSSSSSS___]
|
|
template <Arg AlignOn>
|
|
LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
|
|
Ptr headtail_dst = dst + count;
|
|
CPtr headtail_src = src + count;
|
|
size_t headtail_size = 0;
|
|
align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
|
|
headtail_size);
|
|
adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
|
|
head_tail(headtail_dst, headtail_src, headtail_size);
|
|
count -= headtail_size;
|
|
}
|
|
|
|
// Move forward suitable when dst < src. We load the tail bytes before
|
|
// handling the loop.
|
|
//
|
|
// e.g. Moving two bytes
|
|
// [ | | | | |]
|
|
// [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
|
|
// [_________________________LLLLLLLL___]
|
|
// [___LLLLLLLL_________________________]
|
|
// [_SSSSSSSS___________________________]
|
|
// [___________LLLLLLLL_________________]
|
|
// [_________SSSSSSSS___________________]
|
|
// [___________________LLLLLLLL_________]
|
|
// [_________________SSSSSSSS___________]
|
|
// [_______________________SSSSSSSS_____]
|
|
LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
|
|
size_t count) {
|
|
static_assert(SIZE > 1, "a loop of size 1 does not need tail");
|
|
const size_t tail_offset = count - SIZE;
|
|
const auto tail_value = load<T>(src + tail_offset);
|
|
size_t offset = 0;
|
|
LIBC_LOOP_NOUNROLL
|
|
do {
|
|
block(dst + offset, src + offset);
|
|
offset += SIZE;
|
|
} while (offset < count - SIZE);
|
|
store<T>(dst + tail_offset, tail_value);
|
|
}
|
|
|
|
// Move backward suitable when dst > src. We load the head bytes before
|
|
// handling the loop.
|
|
//
|
|
// e.g. Moving two bytes
|
|
// [ | | | | |]
|
|
// [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
|
|
// [___LLLLLLLL_________________________]
|
|
// [_________________________LLLLLLLL___]
|
|
// [___________________________SSSSSSSS_]
|
|
// [_________________LLLLLLLL___________]
|
|
// [___________________SSSSSSSS_________]
|
|
// [_________LLLLLLLL___________________]
|
|
// [___________SSSSSSSS_________________]
|
|
// [_____SSSSSSSS_______________________]
|
|
LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
|
|
size_t count) {
|
|
static_assert(SIZE > 1, "a loop of size 1 does not need tail");
|
|
const auto head_value = load<T>(src);
|
|
ptrdiff_t offset = count - SIZE;
|
|
LIBC_LOOP_NOUNROLL
|
|
do {
|
|
block(dst + offset, src + offset);
|
|
offset -= SIZE;
|
|
} while (offset >= 0);
|
|
store<T>(dst, head_value);
|
|
}
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Low level operations for Bcmp and Memcmp that operate on memory locations.
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Same as load above but with an offset to the pointer.
|
|
// Making the offset explicit hints the compiler to use relevant addressing mode
|
|
// consistently.
|
|
template <typename T> LIBC_INLINE T load(CPtr ptr, size_t offset) {
|
|
return ::LIBC_NAMESPACE::load<T>(ptr + offset);
|
|
}
|
|
|
|
// Same as above but also makes sure the loaded value is in big endian format.
|
|
// This is useful when implementing lexicograhic comparisons as big endian
|
|
// scalar comparison directly maps to lexicographic byte comparisons.
|
|
template <typename T> LIBC_INLINE T load_be(CPtr ptr, size_t offset) {
|
|
return Endian::to_big_endian(load<T>(ptr, offset));
|
|
}
|
|
|
|
// Equality: returns true iff values at locations (p1 + offset) and (p2 +
|
|
// offset) compare equal.
|
|
template <typename T> LIBC_INLINE bool eq(CPtr p1, CPtr p2, size_t offset);
|
|
|
|
// Not equals: returns non-zero iff values at locations (p1 + offset) and (p2 +
|
|
// offset) differ.
|
|
template <typename T> LIBC_INLINE uint32_t neq(CPtr p1, CPtr p2, size_t offset);
|
|
|
|
// Lexicographic comparison:
|
|
// - returns 0 iff values at locations (p1 + offset) and (p2 + offset) compare
|
|
// equal.
|
|
// - returns a negative value if value at location (p1 + offset) is
|
|
// lexicographically less than value at (p2 + offset).
|
|
// - returns a positive value if value at location (p1 + offset) is
|
|
// lexicographically greater than value at (p2 + offset).
|
|
template <typename T>
|
|
LIBC_INLINE MemcmpReturnType cmp(CPtr p1, CPtr p2, size_t offset);
|
|
|
|
// Lexicographic comparison of non-equal values:
|
|
// - returns a negative value if value at location (p1 + offset) is
|
|
// lexicographically less than value at (p2 + offset).
|
|
// - returns a positive value if value at location (p1 + offset) is
|
|
// lexicographically greater than value at (p2 + offset).
|
|
template <typename T>
|
|
LIBC_INLINE MemcmpReturnType cmp_neq(CPtr p1, CPtr p2, size_t offset);
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Memcmp implementation
|
|
//
|
|
// When building memcmp, not all types are considered equals.
|
|
//
|
|
// For instance, the lexicographic comparison of two uint8_t can be implemented
|
|
// as a simple subtraction, but for wider operations the logic can be much more
|
|
// involving, especially on little endian platforms.
|
|
//
|
|
// For such wider types it is a good strategy to test for equality first and
|
|
// only do the expensive lexicographic comparison if necessary.
|
|
//
|
|
// Decomposing the algorithm like this for wider types allows us to have
|
|
// efficient implementation of higher order functions like 'head_tail' or
|
|
// 'loop_and_tail'.
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Type traits to decide whether we can use 'cmp' directly or if we need to
|
|
// split the computation.
|
|
template <typename T> struct cmp_is_expensive;
|
|
|
|
template <typename T> struct Memcmp {
|
|
static_assert(is_element_type_v<T>);
|
|
static constexpr size_t SIZE = sizeof(T);
|
|
|
|
private:
|
|
LIBC_INLINE static MemcmpReturnType block_offset(CPtr p1, CPtr p2,
|
|
size_t offset) {
|
|
if constexpr (cmp_is_expensive<T>::value) {
|
|
if (!eq<T>(p1, p2, offset))
|
|
return cmp_neq<T>(p1, p2, offset);
|
|
return MemcmpReturnType::zero();
|
|
} else {
|
|
return cmp<T>(p1, p2, offset);
|
|
}
|
|
}
|
|
|
|
public:
|
|
LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
|
|
return block_offset(p1, p2, 0);
|
|
}
|
|
|
|
LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
|
|
return block_offset(p1, p2, count - SIZE);
|
|
}
|
|
|
|
LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
|
|
size_t count) {
|
|
if constexpr (cmp_is_expensive<T>::value) {
|
|
if (!eq<T>(p1, p2, 0))
|
|
return cmp_neq<T>(p1, p2, 0);
|
|
} else {
|
|
if (const auto value = cmp<T>(p1, p2, 0))
|
|
return value;
|
|
}
|
|
return tail(p1, p2, count);
|
|
}
|
|
|
|
LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
|
|
size_t count) {
|
|
return loop_and_tail_offset(p1, p2, count, 0);
|
|
}
|
|
|
|
LIBC_INLINE static MemcmpReturnType
|
|
loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
|
|
if constexpr (SIZE > 1) {
|
|
const size_t limit = count - SIZE;
|
|
LIBC_LOOP_NOUNROLL
|
|
for (; offset < limit; offset += SIZE) {
|
|
if constexpr (cmp_is_expensive<T>::value) {
|
|
if (!eq<T>(p1, p2, offset))
|
|
return cmp_neq<T>(p1, p2, offset);
|
|
} else {
|
|
if (const auto value = cmp<T>(p1, p2, offset))
|
|
return value;
|
|
}
|
|
}
|
|
return block_offset(p1, p2, limit); // tail
|
|
} else {
|
|
// No need for a tail operation when SIZE == 1.
|
|
LIBC_LOOP_NOUNROLL
|
|
for (; offset < count; offset += SIZE)
|
|
if (auto value = cmp<T>(p1, p2, offset))
|
|
return value;
|
|
return MemcmpReturnType::zero();
|
|
}
|
|
}
|
|
|
|
LIBC_INLINE static MemcmpReturnType
|
|
loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
|
|
const AlignHelper<sizeof(T)> helper(p1);
|
|
if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
|
|
if (auto value = block(p1, p2))
|
|
return value;
|
|
adjust(helper.offset(), p1, p2, count);
|
|
}
|
|
return loop_and_tail(p1, p2, count);
|
|
}
|
|
};
|
|
|
|
template <typename T, typename... TS> struct MemcmpSequence {
|
|
static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
|
|
LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
|
|
// TODO: test suggestion in
|
|
// https://reviews.llvm.org/D148717?id=515724#inline-1446890
|
|
// once we have a proper way to check memory operation latency.
|
|
if constexpr (cmp_is_expensive<T>::value) {
|
|
if (!eq<T>(p1, p2, 0))
|
|
return cmp_neq<T>(p1, p2, 0);
|
|
} else {
|
|
if (auto value = cmp<T>(p1, p2, 0))
|
|
return value;
|
|
}
|
|
if constexpr (sizeof...(TS) > 0)
|
|
return MemcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
|
|
else
|
|
return MemcmpReturnType::zero();
|
|
}
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Bcmp
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
template <typename T> struct Bcmp {
|
|
static_assert(is_element_type_v<T>);
|
|
static constexpr size_t SIZE = sizeof(T);
|
|
|
|
LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
|
|
return neq<T>(p1, p2, 0);
|
|
}
|
|
|
|
LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
|
|
const size_t tail_offset = count - SIZE;
|
|
return neq<T>(p1, p2, tail_offset);
|
|
}
|
|
|
|
LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
|
|
if (const auto value = neq<T>(p1, p2, 0))
|
|
return value;
|
|
return tail(p1, p2, count);
|
|
}
|
|
|
|
LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
|
|
size_t count) {
|
|
return loop_and_tail_offset(p1, p2, count, 0);
|
|
}
|
|
|
|
LIBC_INLINE static BcmpReturnType
|
|
loop_and_tail_offset(CPtr p1, CPtr p2, size_t count, size_t offset) {
|
|
if constexpr (SIZE > 1) {
|
|
const size_t limit = count - SIZE;
|
|
LIBC_LOOP_NOUNROLL
|
|
for (; offset < limit; offset += SIZE)
|
|
if (const auto value = neq<T>(p1, p2, offset))
|
|
return value;
|
|
return tail(p1, p2, count);
|
|
} else {
|
|
// No need for a tail operation when SIZE == 1.
|
|
LIBC_LOOP_NOUNROLL
|
|
for (; offset < count; offset += SIZE)
|
|
if (const auto value = neq<T>(p1, p2, offset))
|
|
return value;
|
|
return BcmpReturnType::zero();
|
|
}
|
|
}
|
|
|
|
LIBC_INLINE static BcmpReturnType
|
|
loop_and_tail_align_above(size_t threshold, CPtr p1, CPtr p2, size_t count) {
|
|
static_assert(SIZE > 1,
|
|
"No need to align when processing one byte at a time");
|
|
const AlignHelper<sizeof(T)> helper(p1);
|
|
if (LIBC_UNLIKELY(count >= threshold) && helper.not_aligned()) {
|
|
if (auto value = block(p1, p2))
|
|
return value;
|
|
adjust(helper.offset(), p1, p2, count);
|
|
}
|
|
return loop_and_tail(p1, p2, count);
|
|
}
|
|
};
|
|
|
|
template <typename T, typename... TS> struct BcmpSequence {
|
|
static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
|
|
LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
|
|
if (auto value = neq<T>(p1, p2, 0))
|
|
return value;
|
|
if constexpr (sizeof...(TS) > 0)
|
|
return BcmpSequence<TS...>::block(p1 + sizeof(T), p2 + sizeof(T));
|
|
else
|
|
return BcmpReturnType::zero();
|
|
}
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Specializations for uint8_t
|
|
template <> struct cmp_is_expensive<uint8_t> : public cpp::false_type {};
|
|
template <> LIBC_INLINE bool eq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
|
|
return load<uint8_t>(p1, offset) == load<uint8_t>(p2, offset);
|
|
}
|
|
template <> LIBC_INLINE uint32_t neq<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
|
|
return load<uint8_t>(p1, offset) ^ load<uint8_t>(p2, offset);
|
|
}
|
|
template <>
|
|
LIBC_INLINE MemcmpReturnType cmp<uint8_t>(CPtr p1, CPtr p2, size_t offset) {
|
|
return static_cast<int32_t>(load<uint8_t>(p1, offset)) -
|
|
static_cast<int32_t>(load<uint8_t>(p2, offset));
|
|
}
|
|
template <>
|
|
LIBC_INLINE MemcmpReturnType cmp_neq<uint8_t>(CPtr p1, CPtr p2, size_t offset);
|
|
|
|
} // namespace LIBC_NAMESPACE::generic
|
|
|
|
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
|