diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index dfa03782a442..00db4a8ae122 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -138,6 +138,11 @@ void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) { s[i] = c; } +// Indicates that the provided value is a power of two. +static inline constexpr bool is_pow2(uint64_t x) { + return x && (x & (x - 1)) == 0; +} + } // namespace impl /// A slab allocator used to hand out identically sized slabs of memory. @@ -572,5 +577,27 @@ void *reallocate(void *ptr, uint64_t size) { return new_ptr; } +void *aligned_allocate(uint32_t alignment, uint64_t size) { + // All alignment values must be a non-zero power of two. + if (!impl::is_pow2(alignment)) + return nullptr; + + // If the requested alignment is less than what we already provide this is + // just a normal allocation. + if (alignment < MIN_ALIGNMENT + 1) + return gpu::allocate(size); + + // We can't handle alignments greater than 2MiB so we simply fail. + if (alignment > SLAB_ALIGNMENT + 1) + return nullptr; + + // Trying to handle allocation internally would break the assumption that each + // chunk is identical to eachother. Allocate enough memory with worst-case + // alignment and then round up. The index logic will round down properly. + uint64_t rounded = size + alignment - 1; + void *ptr = gpu::allocate(rounded); + return __builtin_align_up(ptr, alignment); +} + } // namespace gpu } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/GPU/allocator.h b/libc/src/__support/GPU/allocator.h index 757f3a406015..a7cf8bceef27 100644 --- a/libc/src/__support/GPU/allocator.h +++ b/libc/src/__support/GPU/allocator.h @@ -18,6 +18,7 @@ namespace gpu { void *allocate(uint64_t size); void deallocate(void *ptr); void *reallocate(void *ptr, uint64_t size); +void *aligned_allocate(uint32_t alignment, uint64_t size); } // namespace gpu } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/gpu/aligned_alloc.cpp b/libc/src/stdlib/gpu/aligned_alloc.cpp index cd2c7e55128f..34a7eae618fe 100644 --- a/libc/src/stdlib/gpu/aligned_alloc.cpp +++ b/libc/src/stdlib/gpu/aligned_alloc.cpp @@ -15,15 +15,15 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(void *, aligned_alloc, (size_t alignment, size_t size)) { - if ((alignment & -alignment) != alignment) - return nullptr; - - void *ptr = gpu::allocate(size); - if ((reinterpret_cast(ptr) & (alignment - 1)) != 0) { - gpu::deallocate(ptr); - return nullptr; - } - return ptr; + // FIXME: NVIDIA targets currently use the built-in 'malloc' which we cannot + // reason with. But we still need to provide this function for compatibility. +#ifndef LIBC_TARGET_ARCH_IS_NVPTX + return gpu::aligned_allocate(static_cast(alignment), size); +#else + (void)alignment; + (void)size; + return nullptr; +#endif } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/integration/src/stdlib/gpu/CMakeLists.txt b/libc/test/integration/src/stdlib/gpu/CMakeLists.txt index 69e1909fe78e..5f9a215bb873 100644 --- a/libc/test/integration/src/stdlib/gpu/CMakeLists.txt +++ b/libc/test/integration/src/stdlib/gpu/CMakeLists.txt @@ -32,6 +32,21 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) --blocks 1024 ) + add_integration_test( + aligned_alloc + SUITE + stdlib-gpu-integration-tests + SRCS + aligned_alloc.cpp + DEPENDS + libc.src.stdlib.aligned_alloc + libc.src.stdlib.malloc + libc.src.stdlib.free + LOADER_ARGS + --threads 256 + --blocks 128 + ) + add_integration_test( malloc_stress SUITE diff --git a/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp b/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp new file mode 100644 index 000000000000..b966e6953cc2 --- /dev/null +++ b/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp @@ -0,0 +1,29 @@ +#include "test/IntegrationTest/test.h" + +#include "src/__support/GPU/utils.h" +#include "src/stdlib/aligned_alloc.h" // Adjust path if needed +#include "src/stdlib/free.h" + +using namespace LIBC_NAMESPACE; + +TEST_MAIN(int, char **, char **) { + // aligned_alloc with valid alignment and size + void *ptr = LIBC_NAMESPACE::aligned_alloc(32, 16); + EXPECT_NE(ptr, nullptr); + EXPECT_EQ(__builtin_is_aligned(ptr, 32), 0U); + + LIBC_NAMESPACE::free(ptr); + + // aligned_alloc fails if alignment is not power of two + void *bad_align = LIBC_NAMESPACE::aligned_alloc(30, 99); + EXPECT_EQ(bad_align, nullptr); + + // aligned_alloc with a divergent size. + size_t alignment = 1 << (__gpu_lane_id() % 8 + 1); + void *div = + LIBC_NAMESPACE::aligned_alloc(alignment, (gpu::get_thread_id() + 1) * 4); + EXPECT_NE(div, nullptr); + EXPECT_EQ(__builtin_is_aligned(div, alignment), 0U); + + return 0; +}