[OMPIRBuilder][debug] Don't drop debug info for loop constructs. (#144393)
In OMPIRBuilder, we have many cases where we don't handle the debug
location correctly while changing the location or insertion point. This
is one of those cases.
Please see the following test program.
```
program main
implicit none
integer i, j
integer array(16384)
!$omp target teams distribute
DO i=1,16384
!$omp parallel do
DO j=1,16384
array(j) = i
ENDDO
!$omp end parallel do
ENDDO
!$omp end target teams distribute
print *, array
end program main
```
When tried to compile with the follownig command
`flang -g -O2 -fopenmp test.f90 -o test --offload-arch=gfx90a`
will fail in the verification with the following errors: `!dbg
attachment points at wrong subprogram for function`
This happens because we were dropping the debug location in the
createCanonicalLoop and the call to the functions like
`__kmpc_distribute_static_4u` get generated without a debug location.
When it gets inlined, the locations inside it are not adjusted as the
call instruction does not have the debug locations
(`llvm/lib/Transforms/Utils/InlineFunction.cpp:fixupLineNumbers`). Later
Verifier finds that the caller have instructions with debug locations
that point to another function and fails.
The fix is simple to not drop the debug location.
This commit is contained in:
@@ -4184,7 +4184,11 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
|
||||
Value *IndVar = Builder.CreateAdd(Span, Start);
|
||||
return BodyGenCB(Builder.saveIP(), IndVar);
|
||||
};
|
||||
LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
|
||||
LocationDescription LoopLoc =
|
||||
ComputeIP.isSet()
|
||||
? Loc
|
||||
: LocationDescription(Builder.saveIP(),
|
||||
Builder.getCurrentDebugLocation());
|
||||
return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
|
||||
}
|
||||
|
||||
|
||||
66
mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
Normal file
66
mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir
Normal file
@@ -0,0 +1,66 @@
|
||||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
|
||||
|
||||
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} {
|
||||
omp.private {type = private} @_QFEj_private_i32 : i32 loc(#loc1)
|
||||
omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
|
||||
llvm.func @test() {
|
||||
%3 = llvm.mlir.constant(1 : i64) : i64
|
||||
%4 = llvm.alloca %3 x i32 {bindc_name = "j"} : (i64) -> !llvm.ptr<5> loc(#loc4)
|
||||
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
|
||||
%6 = llvm.mlir.constant(1 : i64) : i64
|
||||
%7 = llvm.alloca %6 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc4)
|
||||
%8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
|
||||
%9 = llvm.mlir.constant(16383 : index) : i64
|
||||
%10 = llvm.mlir.constant(0 : index) : i64
|
||||
%11 = llvm.mlir.constant(1 : index) : i64
|
||||
%12 = llvm.mlir.constant(16384 : i32) : i32
|
||||
%14 = llvm.mlir.addressof @_QFEarray : !llvm.ptr
|
||||
%18 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
|
||||
%20 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "j"} loc(#loc3)
|
||||
%22 = omp.map.bounds lower_bound(%10 : i64) upper_bound(%9 : i64) extent(%9 : i64) stride(%11 : i64) start_idx(%11 : i64) loc(#loc3)
|
||||
%23 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.array<16384 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%22) -> !llvm.ptr {name = "array"} loc(#loc3)
|
||||
%24 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3)
|
||||
omp.target map_entries(%18 -> %arg0, %20 -> %arg2, %23 -> %arg4, %24 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) {
|
||||
%25 = llvm.mlir.constant(1 : i32) : i32
|
||||
%27 = llvm.mlir.constant(16384 : i32) : i32
|
||||
omp.teams {
|
||||
omp.distribute private(@_QFEi_private_i32 %arg5 -> %arg6 : !llvm.ptr) {
|
||||
omp.loop_nest (%arg7) : i32 = (%25) to (%27) inclusive step (%25) {
|
||||
omp.parallel {
|
||||
omp.wsloop private(@_QFEj_private_i32 %arg2 -> %arg8 : !llvm.ptr) {
|
||||
omp.loop_nest (%arg9) : i32 = (%25) to (%27) inclusive step (%25) {
|
||||
llvm.store %arg9, %arg8 : i32, !llvm.ptr loc(#loc9)
|
||||
omp.yield
|
||||
} loc(#loc9)
|
||||
} loc(#loc9)
|
||||
omp.terminator loc(#loc9)
|
||||
} loc(#loc9)
|
||||
omp.yield loc(#loc9)
|
||||
} loc(#loc9)
|
||||
} loc(#loc9)
|
||||
omp.terminator loc(#loc9)
|
||||
} loc(#loc9)
|
||||
omp.terminator loc(#loc9)
|
||||
} loc(#loc9)
|
||||
llvm.return loc(#loc9)
|
||||
} loc(#loc14)
|
||||
llvm.mlir.global internal @_QFEarray() {addr_space = 0 : i32} : !llvm.array<16384 x i32> {
|
||||
%0 = llvm.mlir.zero : !llvm.array<16384 x i32>
|
||||
llvm.return %0 : !llvm.array<16384 x i32>
|
||||
} loc(#loc2)
|
||||
}
|
||||
#di_file = #llvm.di_file<"test.f90" in "">
|
||||
#di_null_type = #llvm.di_null_type
|
||||
#loc1 = loc("test.f90":4:23)
|
||||
#loc2 = loc("test.f90":4:15)
|
||||
#loc3 = loc("test.f90":1:7)
|
||||
#loc4 = loc("test.f90":4:18)
|
||||
#loc9 = loc("test.f90":13:11)
|
||||
#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang", isOptimized = true, emissionKind = LineTablesOnly>
|
||||
#di_subroutine_type = #llvm.di_subroutine_type<callingConvention = DW_CC_program, types = #di_null_type>
|
||||
#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>, compileUnit = #di_compile_unit, scope = #di_file, name = "main", file = #di_file, subprogramFlags = "Definition|Optimized|MainSubprogram", type = #di_subroutine_type>
|
||||
#loc14 = loc(fused<#di_subprogram>[#loc3])
|
||||
|
||||
|
||||
// CHECK: call void @__kmpc_distribute_static{{.*}}!dbg
|
||||
|
||||
Reference in New Issue
Block a user