[libc][NFC] Remove template from GPU allocator reference counter

Summary:
We don't need this to be generic, precommit for
https://github.com/llvm/llvm-project/pull/143607
This commit is contained in:
Joseph Huber
2025-06-11 11:37:12 -05:00
parent 459475020a
commit f1575de4c5

View File

@@ -283,7 +283,7 @@ struct Slab {
/// A wait-free guard around a pointer resource to be created dynamically if
/// space is available and freed once there are no more users.
template <typename T> struct GuardPtr {
struct GuardPtr {
private:
struct RefCounter {
// Indicates that the object is in its deallocation phase and thus invalid.
@@ -339,22 +339,22 @@ private:
cpp::Atomic<uint64_t> counter{0};
};
cpp::Atomic<T *> ptr{nullptr};
cpp::Atomic<Slab *> ptr{nullptr};
RefCounter ref{};
// Should be called be a single lane for each different pointer.
template <typename... Args>
T *try_lock_impl(uint32_t n, uint64_t &count, Args &&...args) {
T *expected = ptr.load(cpp::MemoryOrder::RELAXED);
Slab *try_lock_impl(uint32_t n, uint64_t &count, Args &&...args) {
Slab *expected = ptr.load(cpp::MemoryOrder::RELAXED);
if (!expected &&
ptr.compare_exchange_strong(expected, reinterpret_cast<T *>(SENTINEL),
cpp::MemoryOrder::RELAXED,
cpp::MemoryOrder::RELAXED)) {
ptr.compare_exchange_strong(
expected, reinterpret_cast<Slab *>(SENTINEL),
cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) {
count = cpp::numeric_limits<uint64_t>::max();
void *raw = impl::rpc_allocate(sizeof(T));
void *raw = impl::rpc_allocate(sizeof(Slab));
if (!raw)
return nullptr;
T *mem = new (raw) T(cpp::forward<Args>(args)...);
Slab *mem = new (raw) Slab(cpp::forward<Args>(args)...);
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
ptr.store(mem, cpp::MemoryOrder::RELAXED);
@@ -364,7 +364,7 @@ private:
return mem;
}
if (!expected || expected == reinterpret_cast<T *>(SENTINEL))
if (!expected || expected == reinterpret_cast<Slab *>(SENTINEL))
return nullptr;
if (!ref.acquire(n, count))
@@ -379,10 +379,10 @@ public:
// The uniform mask represents which lanes share the same pointer. For each
// uniform value we elect a leader to handle it on behalf of the other lanes.
template <typename... Args>
T *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count,
Args &&...args) {
Slab *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count,
Args &&...args) {
count = 0;
T *result = nullptr;
Slab *result = nullptr;
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform)))
result = try_lock_impl(cpp::popcount(uniform), count,
cpp::forward<Args>(args)...);
@@ -403,8 +403,8 @@ public:
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(mask)) &&
ref.release(cpp::popcount(mask))) {
T *p = ptr.load(cpp::MemoryOrder::RELAXED);
p->~T();
Slab *p = ptr.load(cpp::MemoryOrder::RELAXED);
p->~Slab();
impl::rpc_free(p);
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
ptr.store(nullptr, cpp::MemoryOrder::RELAXED);
@@ -417,7 +417,7 @@ public:
};
// The global array used to search for a valid slab to allocate from.
static GuardPtr<Slab> slots[ARRAY_SIZE] = {};
static GuardPtr slots[ARRAY_SIZE] = {};
// Tries to find a slab in the table that can support the given chunk size.
static Slab *find_slab(uint32_t chunk_size) {