[OMPIRBuilder][debug] Don't drop debug info for loop constructs. (#144393)

In OMPIRBuilder, we have many cases where we don't handle the debug location correctly while changing the location or insertion point. This is one of those cases. Please see the following test program. ``` program main implicit none integer i, j integer array(16384) !$omp target teams distribute DO i=1,16384 !$omp parallel do DO j=1,16384 array(j) = i ENDDO !$omp end parallel do ENDDO !$omp end target teams distribute print *, array end program main ``` When tried to compile with the follownig command `flang -g -O2 -fopenmp test.f90 -o test --offload-arch=gfx90a` will fail in the verification with the following errors: `!dbg attachment points at wrong subprogram for function` This happens because we were dropping the debug location in the createCanonicalLoop and the call to the functions like `__kmpc_distribute_static_4u` get generated without a debug location. When it gets inlined, the locations inside it are not adjusted as the call instruction does not have the debug locations (`llvm/lib/Transforms/Utils/InlineFunction.cpp:fixupLineNumbers`). Later Verifier finds that the caller have instructions with debug locations that point to another function and fails. The fix is simple to not drop the debug location.
2025-06-17 09:34:47 +01:00
parent 308b97a5d4
commit 2c90ebf3a7
2 changed files with 71 additions and 1 deletions
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4184,7 +4184,11 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
    Value *IndVar = Builder.CreateAdd(Span, Start);
    return BodyGenCB(Builder.saveIP(), IndVar);
  };
-  LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
+  LocationDescription LoopLoc =
+      ComputeIP.isSet()
+          ? Loc
+          : LocationDescription(Builder.saveIP(),
+                                Builder.getCurrentDebugLocation());
  return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
 }

--- a/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
@@ -0,0 +1,66 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
+  omp.private {type = private} @_QFEj_private_i32 : i32 loc(#loc1)
+  omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
+  llvm.func @test() {
+    %3 = llvm.mlir.constant(1 : i64) : i64
+    %4 = llvm.alloca %3 x i32 {bindc_name = "j"} : (i64) -> !llvm.ptr<5> loc(#loc4)
+    %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
+    %6 = llvm.mlir.constant(1 : i64) : i64
+    %7 = llvm.alloca %6 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc4)
+    %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+    %9 = llvm.mlir.constant(16383 : index) : i64
+    %10 = llvm.mlir.constant(0 : index) : i64
+    %11 = llvm.mlir.constant(1 : index) : i64
+    %12 = llvm.mlir.constant(16384 : i32) : i32
+    %14 = llvm.mlir.addressof @_QFEarray : !llvm.ptr
+    %18 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
+    %20 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "j"} loc(#loc3)
+    %22 = omp.map.bounds lower_bound(%10 : i64) upper_bound(%9 : i64) extent(%9 : i64) stride(%11 : i64) start_idx(%11 : i64) loc(#loc3)
+    %23 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.array<16384 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%22) -> !llvm.ptr {name = "array"} loc(#loc3)
+    %24 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
+    omp.target map_entries(%18 -> %arg0, %20 -> %arg2, %23 -> %arg4, %24 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+      %25 = llvm.mlir.constant(1 : i32) : i32
+      %27 = llvm.mlir.constant(16384 : i32) : i32
+      omp.teams {
+        omp.distribute private(@_QFEi_private_i32 %arg5 -> %arg6 : !llvm.ptr) {
+          omp.loop_nest (%arg7) : i32 = (%25) to (%27) inclusive step (%25) {
+            omp.parallel {
+              omp.wsloop private(@_QFEj_private_i32 %arg2 -> %arg8 : !llvm.ptr) {
+                omp.loop_nest (%arg9) : i32 = (%25) to (%27) inclusive step (%25) {
+                  llvm.store %arg9, %arg8 : i32, !llvm.ptr loc(#loc9)
+                  omp.yield
+                } loc(#loc9)
+              } loc(#loc9)
+              omp.terminator loc(#loc9)
+            } loc(#loc9)
+            omp.yield loc(#loc9)
+          } loc(#loc9)
+        } loc(#loc9)
+        omp.terminator loc(#loc9)
+      } loc(#loc9)
+      omp.terminator loc(#loc9)
+    } loc(#loc9)
+    llvm.return loc(#loc9)
+  } loc(#loc14)
+  llvm.mlir.global internal @_QFEarray() {addr_space = 0 : i32} : !llvm.array<16384 x i32> {
+    %0 = llvm.mlir.zero : !llvm.array<16384 x i32>
+    llvm.return %0 : !llvm.array<16384 x i32>
+  } loc(#loc2)
+}
+#di_file = #llvm.di_file<"test.f90" in "">
+#di_null_type = #llvm.di_null_type
+#loc1 = loc("test.f90":4:23)
+#loc2 = loc("test.f90":4:15)
+#loc3 = loc("test.f90":1:7)
+#loc4 = loc("test.f90":4:18)
+#loc9 = loc("test.f90":13:11)
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang", isOptimized = true, emissionKind = LineTablesOnly>
+#di_subroutine_type = #llvm.di_subroutine_type<callingConvention = DW_CC_program, types = #di_null_type>
+#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>, compileUnit = #di_compile_unit, scope = #di_file, name = "main", file = #di_file, subprogramFlags = "Definition|Optimized|MainSubprogram", type = #di_subroutine_type>
+#loc14 = loc(fused<#di_subprogram>[#loc3])
+
+
+// CHECK: call void @__kmpc_distribute_static{{.*}}!dbg
+