[OpenMP] Remove use of '__AMDGCN_WAVEFRONT_SIZE' (#113156)
Summary: This is going to be deprecated in https://github.com/llvm/llvm-project/pull/112849. This patch ports it to use the builtin instead. This isn't a compile constant, so it could slightly negatively affect codegen. There really should be an IR pass to turn it into a constant if the function has known attributes. Using the builtin is correct when we just do it for knowing the size like we do here. Obviously guarding w32/w64 code with this check would be broken.
This commit is contained in:
@@ -25,7 +25,6 @@ namespace ompx {
|
||||
namespace impl {
|
||||
|
||||
// Forward declarations defined to be defined for AMDGCN and NVPTX.
|
||||
const llvm::omp::GV &getGridValue();
|
||||
LaneMaskTy activemask();
|
||||
LaneMaskTy lanemaskLT();
|
||||
LaneMaskTy lanemaskGT();
|
||||
@@ -37,15 +36,14 @@ uint32_t getBlockIdInKernel(int32_t Dim);
|
||||
uint32_t getNumberOfBlocksInKernel(int32_t Dim);
|
||||
uint32_t getWarpIdInBlock();
|
||||
uint32_t getNumberOfWarpsInBlock();
|
||||
uint32_t getWarpSize();
|
||||
|
||||
/// AMDGCN Implementation
|
||||
///
|
||||
///{
|
||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||
|
||||
const llvm::omp::GV &getGridValue() {
|
||||
return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
|
||||
}
|
||||
uint32_t getWarpSize() { return __builtin_amdgcn_wavefrontsize(); }
|
||||
|
||||
uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
|
||||
switch (Dim) {
|
||||
@@ -152,7 +150,7 @@ uint32_t getNumberOfThreadsInBlock(int32_t Dim) {
|
||||
UNREACHABLE("Dim outside range!");
|
||||
}
|
||||
|
||||
const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; }
|
||||
uint32_t getWarpSize() { return __nvvm_read_ptx_sreg_warpsize(); }
|
||||
|
||||
LaneMaskTy activemask() { return __nvvm_activemask(); }
|
||||
|
||||
@@ -219,8 +217,6 @@ uint32_t getNumberOfWarpsInBlock() {
|
||||
#pragma omp end declare variant
|
||||
///}
|
||||
|
||||
uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; }
|
||||
|
||||
} // namespace impl
|
||||
} // namespace ompx
|
||||
|
||||
|
||||
@@ -8,22 +8,33 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||
unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); }
|
||||
#pragma omp end declare variant
|
||||
|
||||
#pragma omp begin declare variant match(device = {arch(nvptx64)})
|
||||
unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); }
|
||||
#pragma omp end declare variant
|
||||
|
||||
#pragma omp begin declare variant match(device = {kind(cpu)})
|
||||
unsigned get_warp_size() { return 1; }
|
||||
#pragma omp end declare variant
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
const int num_blocks = 1;
|
||||
const int block_size = 256;
|
||||
const int N = num_blocks * block_size;
|
||||
int *res = (int *)malloc(N * sizeof(int));
|
||||
|
||||
#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) \
|
||||
map(from: res[0:N])
|
||||
#pragma omp target teams ompx_bare num_teams(num_blocks) \
|
||||
thread_limit(block_size) map(from : res[0 : N])
|
||||
{
|
||||
int tid = ompx_thread_id_x();
|
||||
uint64_t mask = ompx_ballot_sync(~0LU, tid & 0x1);
|
||||
#if defined __AMDGCN_WAVEFRONT_SIZE && __AMDGCN_WAVEFRONT_SIZE == 64
|
||||
res[tid] = mask == 0xaaaaaaaaaaaaaaaa;
|
||||
#else
|
||||
res[tid] = mask == 0xaaaaaaaa;
|
||||
#endif
|
||||
if (get_warp_size() == 64)
|
||||
res[tid] = mask == 0xaaaaaaaaaaaaaaaa;
|
||||
else
|
||||
res[tid] = mask == 0xaaaaaaaa;
|
||||
}
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
|
||||
@@ -10,6 +10,18 @@
|
||||
#include <ompx.h>
|
||||
#include <type_traits>
|
||||
|
||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||
unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); }
|
||||
#pragma omp end declare variant
|
||||
|
||||
#pragma omp begin declare variant match(device = {arch(nvptx64)})
|
||||
unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); }
|
||||
#pragma omp end declare variant
|
||||
|
||||
#pragma omp begin declare variant match(device = {kind(cpu)})
|
||||
unsigned get_warp_size() { return 1; }
|
||||
#pragma omp end declare variant
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
|
||||
bool equal(T LHS, T RHS) {
|
||||
return LHS == RHS;
|
||||
@@ -32,11 +44,7 @@ template <typename T> void test() {
|
||||
{
|
||||
int tid = ompx_thread_id_x();
|
||||
T val = ompx::shfl_down_sync(~0U, static_cast<T>(tid), 1);
|
||||
#ifdef __AMDGCN_WAVEFRONT_SIZE
|
||||
int warp_size = __AMDGCN_WAVEFRONT_SIZE;
|
||||
#else
|
||||
int warp_size = 32;
|
||||
#endif
|
||||
int warp_size = get_warp_size();
|
||||
if ((tid & (warp_size - 1)) != warp_size - 1)
|
||||
res[tid] = equal(val, static_cast<T>(tid + 1));
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user