Provides AMDGCN and NVPTX specific specialization of getGPUWarpSize, getGPUThreadID, and getGPUNumThreads methods. Adds tests for AMDGCN codegen for these methods in generic and simd modes. Also changes the precondition in InitTempAlloca to be slightly more permissive. Useful for AMDGCN OpenMP codegen where allocas are created with a cast to an address space. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D84260
62 lines
2.2 KiB
C++
62 lines
2.2 KiB
C++
//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This provides a class for OpenMP runtime code generation specialized to
|
|
// AMDGCN targets from generalized CGOpenMPRuntimeGPU class.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "CGOpenMPRuntimeAMDGCN.h"
|
|
#include "CGOpenMPRuntimeGPU.h"
|
|
#include "CodeGenFunction.h"
|
|
#include "clang/AST/Attr.h"
|
|
#include "clang/AST/DeclOpenMP.h"
|
|
#include "clang/AST/StmtOpenMP.h"
|
|
#include "clang/AST/StmtVisitor.h"
|
|
#include "clang/Basic/Cuda.h"
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
|
|
|
using namespace clang;
|
|
using namespace CodeGen;
|
|
using namespace llvm::omp;
|
|
|
|
CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
|
|
: CGOpenMPRuntimeGPU(CGM) {
|
|
if (!CGM.getLangOpts().OpenMPIsDevice)
|
|
llvm_unreachable("OpenMP AMDGCN can only handle device code.");
|
|
}
|
|
|
|
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
|
|
CGBuilderTy &Bld = CGF.Builder;
|
|
// return constant compile-time target-specific warp size
|
|
unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
|
|
return Bld.getInt32(WarpSize);
|
|
}
|
|
|
|
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) {
|
|
CGBuilderTy &Bld = CGF.Builder;
|
|
llvm::Function *F =
|
|
CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x);
|
|
return Bld.CreateCall(F, llvm::None, "nvptx_tid");
|
|
}
|
|
|
|
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
|
|
CGBuilderTy &Bld = CGF.Builder;
|
|
llvm::Module *M = &CGF.CGM.getModule();
|
|
const char *LocSize = "__ockl_get_local_size";
|
|
llvm::Function *F = M->getFunction(LocSize);
|
|
if (!F) {
|
|
F = llvm::Function::Create(
|
|
llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false),
|
|
llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
|
|
}
|
|
return Bld.CreateTrunc(
|
|
Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty);
|
|
}
|