Files
clang-p2996/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
Saiyedul Islam 160ff83765 [OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 3
Provides AMDGCN and NVPTX specific specialization of getGPUWarpSize,
getGPUThreadID, and getGPUNumThreads methods. Adds tests for AMDGCN
codegen for these methods in generic and simd modes. Also changes the
precondition in InitTempAlloca to be slightly more permissive. Useful for
AMDGCN OpenMP codegen where allocas are created with a cast to an
address space.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D84260
2020-08-03 05:38:39 +00:00

62 lines
2.2 KiB
C++

//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This provides a class for OpenMP runtime code generation specialized to
// AMDGCN targets from generalized CGOpenMPRuntimeGPU class.
//
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntimeAMDGCN.h"
#include "CGOpenMPRuntimeGPU.h"
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
: CGOpenMPRuntimeGPU(CGM) {
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP AMDGCN can only handle device code.");
}
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
// return constant compile-time target-specific warp size
unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
return Bld.getInt32(WarpSize);
}
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Function *F =
CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x);
return Bld.CreateCall(F, llvm::None, "nvptx_tid");
}
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Module *M = &CGF.CGM.getModule();
const char *LocSize = "__ockl_get_local_size";
llvm::Function *F = M->getFunction(LocSize);
if (!F) {
F = llvm::Function::Create(
llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false),
llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
}
return Bld.CreateTrunc(
Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty);
}