From 89d9a83b704a8f6b5bd64dac93095a9228c601d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Wed, 21 May 2025 13:05:14 -0700 Subject: [PATCH] [flang][cuda] Use NVVM op for barrier0 intrinsic (#140947) The simple form of `Barrier0Op` is available in the NVVM dialect. It is needed to use it instead of the string version since https://github.com/llvm/llvm-project/pull/140615 --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 7 +------ flang/test/Lower/CUDA/cuda-device-proc.cuf | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 1ac0627da952..178b6770d6b5 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -8332,12 +8332,7 @@ IntrinsicLibrary::genSum(mlir::Type resultType, // SYNCTHREADS void IntrinsicLibrary::genSyncThreads(llvm::ArrayRef args) { - constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0"; - mlir::FunctionType funcType = - mlir::FunctionType::get(builder.getContext(), {}, {}); - auto funcOp = builder.createFunction(loc, funcName, funcType); - llvm::SmallVector noArgs; - builder.create(loc, funcOp, noArgs); + builder.create(loc); } // SYNCTHREADS_AND diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 8f5e6dd36da4..42ee7657966e 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -49,7 +49,7 @@ attributes(global) subroutine devsub() end ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} -! CHECK: fir.call @llvm.nvvm.barrier0() fastmath : () -> () +! CHECK: nvvm.barrier0 ! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath : (i32) -> () ! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath : () -> () ! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath : () -> () @@ -106,7 +106,7 @@ end ! CHECK-LABEL: func.func @_QPhost1() ! CHECK: cuf.kernel -! CHECK: fir.call @llvm.nvvm.barrier0() fastmath : () -> () +! CHECK: nvvm.barrier0 ! CHECK: fir.call @llvm.nvvm.bar.warp.sync(%c1{{.*}}) fastmath : (i32) -> () ! CHECK: fir.call @llvm.nvvm.barrier0.and(%c1{{.*}}) fastmath : (i32) -> i32 ! CHECK: fir.call @llvm.nvvm.barrier0.popc(%c1{{.*}}) fastmath : (i32) -> i32