From da0c21bd4b62ee2a4a2709f49ea8b19538d1588a Mon Sep 17 00:00:00 2001 From: Adam Straw Date: Sun, 22 Jun 2025 21:09:44 -0700 Subject: [PATCH] [mlir][gpu] Fix bug with GPU hardware intrinsic global location (#144923) Bug description: Hardware intrinsic functions created during GPU conversion to NVVM may contain debug info metadata from the original function which cannot be used out of that function. --- mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h | 7 ++++++- .../test/Conversion/GPUToNVVM/gpu-to-nvvm-debuginfo.mlir | 9 +++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h index 34150c4d1308..64cf09e600b8 100644 --- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h +++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h @@ -164,7 +164,12 @@ public: auto parentFunc = op->getParentOfType(); assert(parentFunc && "expected there to be a parent function"); OpBuilder b(parentFunc); - return b.create(op->getLoc(), funcName, funcType); + + // Create a valid global location removing any metadata attached to the + // location as debug info metadata inside of a function cannot be used + // outside of that function. + auto globalloc = op->getLoc()->findInstanceOfOrUnknown(); + return b.create(globalloc, funcName, funcType); } StringRef getFunctionName(Type type, SourceOp op) const { diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-debuginfo.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-debuginfo.mlir index 08c5800fe93b..5304abfb09a1 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-debuginfo.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-debuginfo.mlir @@ -23,3 +23,12 @@ gpu.module @test_module_1 { gpu.return } } + +// Check that debug info metadata from the function is removed from the global location. +gpu.module @test_module_2 { + // CHECK-DAG: llvm.func @__nv_abs(i32) -> i32 loc([[LOC]]) + func.func @gpu_abs_with_loc(%arg_i32 : i32) -> (i32) { + %result32 = math.absi %arg_i32 : i32 loc(fused<#di_subprogram>[#loc]) + func.return %result32 : i32 + } +}