[nfc] Replaces enum indices into an array with a struct. Named the fields to match the enum, leaves memory layout and initialization unchanged. Motivation is to later safely remove dead fields and replace redundant ones with (compile time) computation. It should also be possible to factor some common fields into a base and introduce a gfx10 amdgpu instance with less duplication than the arrays of integers require. Reviewed By: ronlieb Differential Revision: https://reviews.llvm.org/D108339
62 lines
2.2 KiB
C++
62 lines
2.2 KiB
C++
//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This provides a class for OpenMP runtime code generation specialized to
|
|
// AMDGCN targets from generalized CGOpenMPRuntimeGPU class.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "CGOpenMPRuntimeAMDGCN.h"
|
|
#include "CGOpenMPRuntimeGPU.h"
|
|
#include "CodeGenFunction.h"
|
|
#include "clang/AST/Attr.h"
|
|
#include "clang/AST/DeclOpenMP.h"
|
|
#include "clang/AST/StmtOpenMP.h"
|
|
#include "clang/AST/StmtVisitor.h"
|
|
#include "clang/Basic/Cuda.h"
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
|
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
|
|
|
using namespace clang;
|
|
using namespace CodeGen;
|
|
using namespace llvm::omp;
|
|
|
|
CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
|
|
: CGOpenMPRuntimeGPU(CGM) {
|
|
if (!CGM.getLangOpts().OpenMPIsDevice)
|
|
llvm_unreachable("OpenMP AMDGCN can only handle device code.");
|
|
}
|
|
|
|
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
|
|
CGBuilderTy &Bld = CGF.Builder;
|
|
// return constant compile-time target-specific warp size
|
|
unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
|
|
return Bld.getInt32(WarpSize);
|
|
}
|
|
|
|
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) {
|
|
CGBuilderTy &Bld = CGF.Builder;
|
|
llvm::Function *F =
|
|
CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x);
|
|
return Bld.CreateCall(F, llvm::None, "nvptx_tid");
|
|
}
|
|
|
|
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
|
|
CGBuilderTy &Bld = CGF.Builder;
|
|
llvm::Module *M = &CGF.CGM.getModule();
|
|
const char *LocSize = "__kmpc_amdgcn_gpu_num_threads";
|
|
llvm::Function *F = M->getFunction(LocSize);
|
|
if (!F) {
|
|
F = llvm::Function::Create(
|
|
llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
|
|
llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
|
|
}
|
|
return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
|
|
}
|