[flang][cuda] Use NVVM op for barrier0 intrinsic (#140947)
The simple form of `Barrier0Op` is available in the NVVM dialect. It is needed to use it instead of the string version since https://github.com/llvm/llvm-project/pull/140615
This commit is contained in:
committed by
GitHub
parent
5ba57a81f2
commit
89d9a83b70
@@ -8332,12 +8332,7 @@ IntrinsicLibrary::genSum(mlir::Type resultType,
|
||||
|
||||
// SYNCTHREADS
|
||||
void IntrinsicLibrary::genSyncThreads(llvm::ArrayRef<fir::ExtendedValue> args) {
|
||||
constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0";
|
||||
mlir::FunctionType funcType =
|
||||
mlir::FunctionType::get(builder.getContext(), {}, {});
|
||||
auto funcOp = builder.createFunction(loc, funcName, funcType);
|
||||
llvm::SmallVector<mlir::Value> noArgs;
|
||||
builder.create<fir::CallOp>(loc, funcOp, noArgs);
|
||||
builder.create<mlir::NVVM::Barrier0Op>(loc);
|
||||
}
|
||||
|
||||
// SYNCTHREADS_AND
|
||||
|
||||
@@ -49,7 +49,7 @@ attributes(global) subroutine devsub()
|
||||
end
|
||||
|
||||
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
|
||||
! CHECK: fir.call @llvm.nvvm.barrier0() fastmath<contract> : () -> ()
|
||||
! CHECK: nvvm.barrier0
|
||||
! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath<contract> : (i32) -> ()
|
||||
! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> ()
|
||||
! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> ()
|
||||
@@ -106,7 +106,7 @@ end
|
||||
|
||||
! CHECK-LABEL: func.func @_QPhost1()
|
||||
! CHECK: cuf.kernel
|
||||
! CHECK: fir.call @llvm.nvvm.barrier0() fastmath<contract> : () -> ()
|
||||
! CHECK: nvvm.barrier0
|
||||
! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath<contract> : (i32) -> ()
|
||||
! CHECK: fir.call @llvm.nvvm.barrier0.and(%c1{{.*}}) fastmath<contract> : (i32) -> i32
|
||||
! CHECK: fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath<contract> : (i32) -> i32
|
||||
|
||||
Reference in New Issue
Block a user