[flang][cuda] Lower clock64 to nvvm intrinsic (#127155)

This commit is contained in:
Valentin Clement (バレンタイン クレメン)
2025-02-13 18:59:24 -08:00
committed by GitHub
parent dc79c66f2c
commit 910be4ff90
4 changed files with 22 additions and 0 deletions

View File

@@ -231,6 +231,7 @@ struct IntrinsicLibrary {
void genCFProcPointer(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genCLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genClock64(mlir::Type, llvm::ArrayRef<mlir::Value>);
template <mlir::arith::CmpIPredicate pred>
fir::ExtendedValue genCPtrCompare(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);

View File

@@ -209,6 +209,7 @@ static constexpr IntrinsicHandler handlers[]{
&I::genChdir,
{{{"name", asAddr}, {"status", asAddr, handleDynamicOptional}}},
/*isElemental=*/false},
{"clock64", &I::genClock64, {}, /*isElemental=*/false},
{"cmplx",
&I::genCmplx,
{{{"x", asValue}, {"y", asValue, handleDynamicOptional}}}},
@@ -3228,6 +3229,16 @@ IntrinsicLibrary::genChdir(std::optional<mlir::Type> resultType,
return {};
}
// CLOCK64
mlir::Value IntrinsicLibrary::genClock64(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
constexpr llvm::StringLiteral funcName = "llvm.nvvm.read.ptx.sreg.clock64";
mlir::MLIRContext *context = builder.getContext();
mlir::FunctionType ftype = mlir::FunctionType::get(context, {}, {resultType});
auto funcOp = builder.createFunction(loc, funcName, ftype);
return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
}
// CMPLX
mlir::Value IntrinsicLibrary::genCmplx(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {

View File

@@ -628,5 +628,10 @@ implicit none
end interface
public :: atomicdec
interface
attributes(device) integer(8) function clock64()
end function
end interface
public :: clock64
end module

View File

@@ -9,6 +9,7 @@ attributes(global) subroutine devsub()
real(8) :: ad
integer(4) :: ai
integer(8) :: al
integer(8) :: time
call syncthreads()
call syncwarp(1)
@@ -43,6 +44,8 @@ attributes(global) subroutine devsub()
ai = atomicor(ai, 1_4)
ai = atomicinc(ai, 1_4)
ai = atomicdec(ai, 1_4)
time = clock64()
end
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
@@ -79,6 +82,8 @@ end
! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: fir.call @llvm.nvvm.read.ptx.sreg.clock64()
subroutine host1()
integer, device :: a(32)
integer, device :: ret