[OMPIRBuilder][debug] Don't drop debug info for loop constructs. (#144393)

In OMPIRBuilder, we have many cases where we don't handle the debug
location correctly while changing the location or insertion point. This
is one of those cases.

Please see the following test program.
```
program main
  implicit none
  integer i, j
  integer array(16384)

!$omp target teams distribute
  DO i=1,16384
    !$omp parallel do
      DO j=1,16384
        array(j) = i
      ENDDO
    !$omp end parallel do
  ENDDO
!$omp end target teams distribute

print *, array
end program main
```

When tried to compile with the follownig command
`flang -g -O2 -fopenmp  test.f90 -o test  --offload-arch=gfx90a`

will fail in the verification with the following errors: `!dbg
attachment points at wrong subprogram for function`

This happens because we were dropping the debug location in the
createCanonicalLoop and the call to the functions like
`__kmpc_distribute_static_4u` get generated without a debug location.
When it gets inlined, the locations inside it are not adjusted as the
call instruction does not have the debug locations
(`llvm/lib/Transforms/Utils/InlineFunction.cpp:fixupLineNumbers`). Later
Verifier finds that the caller have instructions with debug locations
that point to another function and fails.

The fix is simple to not drop the debug location.
This commit is contained in:
Abid Qadeer
2025-06-17 09:34:47 +01:00
committed by GitHub
parent 308b97a5d4
commit 2c90ebf3a7
2 changed files with 71 additions and 1 deletions

View File

@@ -4184,7 +4184,11 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
Value *IndVar = Builder.CreateAdd(Span, Start);
return BodyGenCB(Builder.saveIP(), IndVar);
};
LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
LocationDescription LoopLoc =
ComputeIP.isSet()
? Loc
: LocationDescription(Builder.saveIP(),
Builder.getCurrentDebugLocation());
return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
}

View File

@@ -0,0 +1,66 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
omp.private {type = private} @_QFEj_private_i32 : i32 loc(#loc1)
omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
llvm.func @test() {
%3 = llvm.mlir.constant(1 : i64) : i64
%4 = llvm.alloca %3 x i32 {bindc_name = "j"} : (i64) -> !llvm.ptr<5> loc(#loc4)
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
%6 = llvm.mlir.constant(1 : i64) : i64
%7 = llvm.alloca %6 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc4)
%8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
%9 = llvm.mlir.constant(16383 : index) : i64
%10 = llvm.mlir.constant(0 : index) : i64
%11 = llvm.mlir.constant(1 : index) : i64
%12 = llvm.mlir.constant(16384 : i32) : i32
%14 = llvm.mlir.addressof @_QFEarray : !llvm.ptr
%18 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
%20 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "j"} loc(#loc3)
%22 = omp.map.bounds lower_bound(%10 : i64) upper_bound(%9 : i64) extent(%9 : i64) stride(%11 : i64) start_idx(%11 : i64) loc(#loc3)
%23 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.array<16384 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%22) -> !llvm.ptr {name = "array"} loc(#loc3)
%24 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
omp.target map_entries(%18 -> %arg0, %20 -> %arg2, %23 -> %arg4, %24 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
%25 = llvm.mlir.constant(1 : i32) : i32
%27 = llvm.mlir.constant(16384 : i32) : i32
omp.teams {
omp.distribute private(@_QFEi_private_i32 %arg5 -> %arg6 : !llvm.ptr) {
omp.loop_nest (%arg7) : i32 = (%25) to (%27) inclusive step (%25) {
omp.parallel {
omp.wsloop private(@_QFEj_private_i32 %arg2 -> %arg8 : !llvm.ptr) {
omp.loop_nest (%arg9) : i32 = (%25) to (%27) inclusive step (%25) {
llvm.store %arg9, %arg8 : i32, !llvm.ptr loc(#loc9)
omp.yield
} loc(#loc9)
} loc(#loc9)
omp.terminator loc(#loc9)
} loc(#loc9)
omp.yield loc(#loc9)
} loc(#loc9)
} loc(#loc9)
omp.terminator loc(#loc9)
} loc(#loc9)
omp.terminator loc(#loc9)
} loc(#loc9)
llvm.return loc(#loc9)
} loc(#loc14)
llvm.mlir.global internal @_QFEarray() {addr_space = 0 : i32} : !llvm.array<16384 x i32> {
%0 = llvm.mlir.zero : !llvm.array<16384 x i32>
llvm.return %0 : !llvm.array<16384 x i32>
} loc(#loc2)
}
#di_file = #llvm.di_file<"test.f90" in "">
#di_null_type = #llvm.di_null_type
#loc1 = loc("test.f90":4:23)
#loc2 = loc("test.f90":4:15)
#loc3 = loc("test.f90":1:7)
#loc4 = loc("test.f90":4:18)
#loc9 = loc("test.f90":13:11)
#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang", isOptimized = true, emissionKind = LineTablesOnly>
#di_subroutine_type = #llvm.di_subroutine_type<callingConvention = DW_CC_program, types = #di_null_type>
#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>, compileUnit = #di_compile_unit, scope = #di_file, name = "main", file = #di_file, subprogramFlags = "Definition|Optimized|MainSubprogram", type = #di_subroutine_type>
#loc14 = loc(fused<#di_subprogram>[#loc3])
// CHECK: call void @__kmpc_distribute_static{{.*}}!dbg