diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index dfa03782a442..00db4a8ae122 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -138,6 +138,11 @@ void uniform_memset(uint32_t *s, uint32_t c, uint32_t n, uint64_t uniform) {
     s[i] = c;
 }
 
+// Indicates that the provided value is a power of two.
+static inline constexpr bool is_pow2(uint64_t x) {
+  return x && (x & (x - 1)) == 0;
+}
+
 } // namespace impl
 
 /// A slab allocator used to hand out identically sized slabs of memory.
@@ -572,5 +577,27 @@ void *reallocate(void *ptr, uint64_t size) {
   return new_ptr;
 }
 
+void *aligned_allocate(uint32_t alignment, uint64_t size) {
+  // All alignment values must be a non-zero power of two.
+  if (!impl::is_pow2(alignment))
+    return nullptr;
+
+  // If the requested alignment is less than what we already provide this is
+  // just a normal allocation.
+  if (alignment < MIN_ALIGNMENT + 1)
+    return gpu::allocate(size);
+
+  // We can't handle alignments greater than 2MiB so we simply fail.
+  if (alignment > SLAB_ALIGNMENT + 1)
+    return nullptr;
+
+  // Trying to handle allocation internally would break the assumption that each
+  // chunk is identical to eachother. Allocate enough memory with worst-case
+  // alignment and then round up. The index logic will round down properly.
+  uint64_t rounded = size + alignment - 1;
+  void *ptr = gpu::allocate(rounded);
+  return __builtin_align_up(ptr, alignment);
+}
+
 } // namespace gpu
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/GPU/allocator.h b/libc/src/__support/GPU/allocator.h
index 757f3a406015..a7cf8bceef27 100644
--- a/libc/src/__support/GPU/allocator.h
+++ b/libc/src/__support/GPU/allocator.h
@@ -18,6 +18,7 @@ namespace gpu {
 void *allocate(uint64_t size);
 void deallocate(void *ptr);
 void *reallocate(void *ptr, uint64_t size);
+void *aligned_allocate(uint32_t alignment, uint64_t size);
 
 } // namespace gpu
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/gpu/aligned_alloc.cpp b/libc/src/stdlib/gpu/aligned_alloc.cpp
index cd2c7e55128f..34a7eae618fe 100644
--- a/libc/src/stdlib/gpu/aligned_alloc.cpp
+++ b/libc/src/stdlib/gpu/aligned_alloc.cpp
@@ -15,15 +15,15 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(void *, aligned_alloc, (size_t alignment, size_t size)) {
-  if ((alignment & -alignment) != alignment)
-    return nullptr;
-
-  void *ptr = gpu::allocate(size);
-  if ((reinterpret_cast<uintptr_t>(ptr) & (alignment - 1)) != 0) {
-    gpu::deallocate(ptr);
-    return nullptr;
-  }
-  return ptr;
+  // FIXME: NVIDIA targets currently use the built-in 'malloc' which we cannot
+  // reason with. But we still need to provide this function for compatibility.
+#ifndef LIBC_TARGET_ARCH_IS_NVPTX
+  return gpu::aligned_allocate(static_cast<uint32_t>(alignment), size);
+#else
+  (void)alignment;
+  (void)size;
+  return nullptr;
+#endif
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/integration/src/stdlib/gpu/CMakeLists.txt b/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
index 69e1909fe78e..5f9a215bb873 100644
--- a/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
+++ b/libc/test/integration/src/stdlib/gpu/CMakeLists.txt
@@ -32,6 +32,21 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
       --blocks 1024
   )
 
+  add_integration_test(
+    aligned_alloc
+    SUITE
+      stdlib-gpu-integration-tests
+    SRCS
+      aligned_alloc.cpp
+    DEPENDS
+      libc.src.stdlib.aligned_alloc
+      libc.src.stdlib.malloc
+      libc.src.stdlib.free
+    LOADER_ARGS
+      --threads 256
+      --blocks 128
+  )
+
   add_integration_test(
     malloc_stress
     SUITE
diff --git a/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp b/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp
new file mode 100644
index 000000000000..b966e6953cc2
--- /dev/null
+++ b/libc/test/integration/src/stdlib/gpu/aligned_alloc.cpp
@@ -0,0 +1,29 @@
+#include "test/IntegrationTest/test.h"
+
+#include "src/__support/GPU/utils.h"
+#include "src/stdlib/aligned_alloc.h" // Adjust path if needed
+#include "src/stdlib/free.h"
+
+using namespace LIBC_NAMESPACE;
+
+TEST_MAIN(int, char **, char **) {
+  // aligned_alloc with valid alignment and size
+  void *ptr = LIBC_NAMESPACE::aligned_alloc(32, 16);
+  EXPECT_NE(ptr, nullptr);
+  EXPECT_EQ(__builtin_is_aligned(ptr, 32), 0U);
+
+  LIBC_NAMESPACE::free(ptr);
+
+  // aligned_alloc fails if alignment is not power of two
+  void *bad_align = LIBC_NAMESPACE::aligned_alloc(30, 99);
+  EXPECT_EQ(bad_align, nullptr);
+
+  // aligned_alloc with a divergent size.
+  size_t alignment = 1 << (__gpu_lane_id() % 8 + 1);
+  void *div =
+      LIBC_NAMESPACE::aligned_alloc(alignment, (gpu::get_thread_id() + 1) * 4);
+  EXPECT_NE(div, nullptr);
+  EXPECT_EQ(__builtin_is_aligned(div, alignment), 0U);
+
+  return 0;
+}