[flang][cuda] Lower clock64 to nvvm intrinsic (#127155)
This commit is contained in:
committed by
GitHub
parent
dc79c66f2c
commit
910be4ff90
@@ -231,6 +231,7 @@ struct IntrinsicLibrary {
|
||||
void genCFProcPointer(llvm::ArrayRef<fir::ExtendedValue>);
|
||||
fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
|
||||
fir::ExtendedValue genCLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
|
||||
mlir::Value genClock64(mlir::Type, llvm::ArrayRef<mlir::Value>);
|
||||
template <mlir::arith::CmpIPredicate pred>
|
||||
fir::ExtendedValue genCPtrCompare(mlir::Type,
|
||||
llvm::ArrayRef<fir::ExtendedValue>);
|
||||
|
||||
@@ -209,6 +209,7 @@ static constexpr IntrinsicHandler handlers[]{
|
||||
&I::genChdir,
|
||||
{{{"name", asAddr}, {"status", asAddr, handleDynamicOptional}}},
|
||||
/*isElemental=*/false},
|
||||
{"clock64", &I::genClock64, {}, /*isElemental=*/false},
|
||||
{"cmplx",
|
||||
&I::genCmplx,
|
||||
{{{"x", asValue}, {"y", asValue, handleDynamicOptional}}}},
|
||||
@@ -3228,6 +3229,16 @@ IntrinsicLibrary::genChdir(std::optional<mlir::Type> resultType,
|
||||
return {};
|
||||
}
|
||||
|
||||
// CLOCK64
|
||||
mlir::Value IntrinsicLibrary::genClock64(mlir::Type resultType,
|
||||
llvm::ArrayRef<mlir::Value> args) {
|
||||
constexpr llvm::StringLiteral funcName = "llvm.nvvm.read.ptx.sreg.clock64";
|
||||
mlir::MLIRContext *context = builder.getContext();
|
||||
mlir::FunctionType ftype = mlir::FunctionType::get(context, {}, {resultType});
|
||||
auto funcOp = builder.createFunction(loc, funcName, ftype);
|
||||
return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
|
||||
}
|
||||
|
||||
// CMPLX
|
||||
mlir::Value IntrinsicLibrary::genCmplx(mlir::Type resultType,
|
||||
llvm::ArrayRef<mlir::Value> args) {
|
||||
|
||||
@@ -628,5 +628,10 @@ implicit none
|
||||
end interface
|
||||
public :: atomicdec
|
||||
|
||||
interface
|
||||
attributes(device) integer(8) function clock64()
|
||||
end function
|
||||
end interface
|
||||
public :: clock64
|
||||
|
||||
end module
|
||||
|
||||
@@ -9,6 +9,7 @@ attributes(global) subroutine devsub()
|
||||
real(8) :: ad
|
||||
integer(4) :: ai
|
||||
integer(8) :: al
|
||||
integer(8) :: time
|
||||
|
||||
call syncthreads()
|
||||
call syncwarp(1)
|
||||
@@ -43,6 +44,8 @@ attributes(global) subroutine devsub()
|
||||
ai = atomicor(ai, 1_4)
|
||||
ai = atomicinc(ai, 1_4)
|
||||
ai = atomicdec(ai, 1_4)
|
||||
|
||||
time = clock64()
|
||||
end
|
||||
|
||||
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
|
||||
@@ -79,6 +82,8 @@ end
|
||||
! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
|
||||
! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
|
||||
|
||||
! CHECK: fir.call @llvm.nvvm.read.ptx.sreg.clock64()
|
||||
|
||||
subroutine host1()
|
||||
integer, device :: a(32)
|
||||
integer, device :: ret
|
||||
|
||||
Reference in New Issue
Block a user