[libc] Efficiently implement aligned_alloc for AMDGPU (#146585)

Summary:
This patch uses the actual allocator interface to implement
`aligned_alloc`. We do this by simply rounding up the amount allocated.
Because of how index calculation works, any offset within an allocated
pointer will still map to the same chunk, so we can just adjust
internally and it will free all the same.
This commit is contained in:
Joseph Huber
2025-07-02 09:25:57 -05:00
committed by GitHub
parent bca79ec0d2
commit 24828c8c45
5 changed files with 81 additions and 9 deletions

View File

@@ -32,6 +32,21 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
--blocks 1024
)
add_integration_test(
aligned_alloc
SUITE
stdlib-gpu-integration-tests
SRCS
aligned_alloc.cpp
DEPENDS
libc.src.stdlib.aligned_alloc
libc.src.stdlib.malloc
libc.src.stdlib.free
LOADER_ARGS
--threads 256
--blocks 128
)
add_integration_test(
malloc_stress
SUITE

View File

@@ -0,0 +1,29 @@
#include "test/IntegrationTest/test.h"
#include "src/__support/GPU/utils.h"
#include "src/stdlib/aligned_alloc.h" // Adjust path if needed
#include "src/stdlib/free.h"
using namespace LIBC_NAMESPACE;
TEST_MAIN(int, char **, char **) {
// aligned_alloc with valid alignment and size
void *ptr = LIBC_NAMESPACE::aligned_alloc(32, 16);
EXPECT_NE(ptr, nullptr);
EXPECT_EQ(__builtin_is_aligned(ptr, 32), 0U);
LIBC_NAMESPACE::free(ptr);
// aligned_alloc fails if alignment is not power of two
void *bad_align = LIBC_NAMESPACE::aligned_alloc(30, 99);
EXPECT_EQ(bad_align, nullptr);
// aligned_alloc with a divergent size.
size_t alignment = 1 << (__gpu_lane_id() % 8 + 1);
void *div =
LIBC_NAMESPACE::aligned_alloc(alignment, (gpu::get_thread_id() + 1) * 4);
EXPECT_NE(div, nullptr);
EXPECT_EQ(__builtin_is_aligned(div, alignment), 0U);
return 0;
}