From 8231dd71cb7dce489f4499a4e4f0ec149e858087 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 25 Jun 2025 07:10:08 -0500 Subject: [PATCH] [flang][OpenMP] Skip runtime mapping with no offload targets (#145594) When no offload targets are specified flang will avoid offloading for "target" constructs, but not "target data" constructs. This patch makes the behavior consistent across all offload-related operations. While ignoring "target" may produce semantically incorrect code, it may still be a useful debugging tool. -- This reinstates commits 6ba1955 and 349f8d6, reverted due to compilation failures in the gfortran test suite. These build problems were caused by an unrelated issue (https://github.com/llvm/llvm-project/issues/145558) which is now fixed. Ref: https://github.com/llvm/llvm-project/pull/144534 --- .../OpenMP/target-data-skip-mapper-calls.f90 | 30 ++ .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 6 + mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 352 +++++++++--------- .../LLVMIR/omptargetdata-nowait-llvm.mlir | 42 ++- .../LLVMIR/openmp-data-target-device.mlir | 2 +- 5 files changed, 247 insertions(+), 185 deletions(-) create mode 100644 flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 diff --git a/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 b/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 new file mode 100644 index 000000000000..f1a150d5dfab --- /dev/null +++ b/flang/test/Lower/OpenMP/target-data-skip-mapper-calls.f90 @@ -0,0 +1,30 @@ +!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s --check-prefix=NORT +!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s --check-prefix=LLVM + +!Make sure that there are no calls to the mapper. +!NORT-NOT: call{{.*}}__tgt_target_data_begin_mapper +!NORT-NOT: call{{.*}}__tgt_target_data_end_mapper + +!Make sure we generate the body +!LLVM: define internal void @_QFPf(ptr %[[A0:[0-9]+]], ptr %[[A1:[0-9]+]]) +!LLVM: %[[V0:[0-9]+]] = load i32, ptr %[[A0]], align 4 +!LLVM: %[[V1:[0-9]+]] = load i32, ptr %[[A1]], align 4 +!LLVM: %[[V2:[0-9]+]] = add i32 %[[V0]], %[[V1]] +!LLVM: store i32 %[[V2]], ptr %[[A0]], align 4 +!LLVM: ret void +!LLVM: } + + +program test + +call f(1, 2) + +contains + +subroutine f(x, y) + integer :: x, y + !$omp target data map(tofrom: x, y) + x = x + y + !$omp end target data +end subroutine +end diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9272f6572fda..5441e0ab8f7f 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -4378,6 +4378,9 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); llvm::OpenMPIRBuilder::TargetDataInfo info(/*RequiresDevicePointerInfo=*/true, /*SeparateBeginEndCalls=*/true); + bool isTargetDevice = ompBuilder->Config.isTargetDevice(); + bool isOffloadEntry = + isTargetDevice || !ompBuilder->Config.TargetTriples.empty(); LogicalResult result = llvm::TypeSwitch(op) @@ -4467,6 +4470,9 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, if (failed(result)) return failure(); + // Pretend we have IF(false) if we're not doing offload. + if (!isOffloadEntry) + ifCond = builder.getFalse(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; MapInfoData mapData; diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 971bea206854..e6ea3aaeec65 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -1,15 +1,17 @@ // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s -llvm.func @_QPopenmp_target_data() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%2 : !llvm.ptr) { - %3 = llvm.mlir.constant(99 : i32) : i32 - llvm.store %3, %1 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%2 : !llvm.ptr) { + %3 = llvm.mlir.constant(99 : i32) : i32 + llvm.store %3, %1 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] @@ -38,23 +40,25 @@ llvm.func @_QPopenmp_target_data() { // ----- -llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { - %1 = llvm.mlir.constant(1023 : index) : i64 - %2 = llvm.mlir.constant(0 : index) : i64 - %3 = llvm.mlir.constant(1024 : index) : i64 - %4 = llvm.mlir.constant(1 : index) : i64 - %5 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%3 : i64) stride(%4 : i64) start_idx(%4 : i64) - %6 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%6 : !llvm.ptr) { - %7 = llvm.mlir.constant(99 : i32) : i32 - %8 = llvm.mlir.constant(1 : i64) : i64 - %9 = llvm.mlir.constant(1 : i64) : i64 - %10 = llvm.mlir.constant(0 : i64) : i64 - %11 = llvm.getelementptr %0[0, %10] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32> - llvm.store %7, %11 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { + %1 = llvm.mlir.constant(1023 : index) : i64 + %2 = llvm.mlir.constant(0 : index) : i64 + %3 = llvm.mlir.constant(1024 : index) : i64 + %4 = llvm.mlir.constant(1 : index) : i64 + %5 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%3 : i64) stride(%4 : i64) start_idx(%4 : i64) + %6 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%6 : !llvm.ptr) { + %7 = llvm.mlir.constant(99 : i32) : i32 + %8 = llvm.mlir.constant(1 : i64) : i64 + %9 = llvm.mlir.constant(1 : i64) : i64 + %10 = llvm.mlir.constant(0 : i64) : i64 + %11 = llvm.getelementptr %0[0, %10] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<1024 x i32> + llvm.store %7, %11 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4096] @@ -85,50 +89,52 @@ llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { // ----- -llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { - %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.alloca %4 x i32 {bindc_name = "dvc", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEdvc"} : (i64) -> !llvm.ptr - %6 = llvm.mlir.constant(1 : i64) : i64 - %7 = llvm.alloca %6 x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEi"} : (i64) -> !llvm.ptr - %8 = llvm.mlir.constant(5 : i32) : i32 - llvm.store %8, %7 : i32, !llvm.ptr - %9 = llvm.mlir.constant(2 : i32) : i32 - llvm.store %9, %5 : i32, !llvm.ptr - %10 = llvm.load %7 : !llvm.ptr -> i32 - %11 = llvm.mlir.constant(10 : i32) : i32 - %12 = llvm.icmp "slt" %10, %11 : i32 - %13 = llvm.load %5 : !llvm.ptr -> i32 - %14 = llvm.mlir.constant(1023 : index) : i64 - %15 = llvm.mlir.constant(0 : index) : i64 - %16 = llvm.mlir.constant(1024 : index) : i64 - %17 = llvm.mlir.constant(1 : index) : i64 - %18 = omp.map.bounds lower_bound(%15 : i64) upper_bound(%14 : i64) extent(%16 : i64) stride(%17 : i64) start_idx(%17 : i64) - %map1 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%18) -> !llvm.ptr {name = ""} - %19 = llvm.mlir.constant(511 : index) : i64 - %20 = llvm.mlir.constant(0 : index) : i64 - %21 = llvm.mlir.constant(512 : index) : i64 - %22 = llvm.mlir.constant(1 : index) : i64 - %23 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%19 : i64) extent(%21 : i64) stride(%22 : i64) start_idx(%22 : i64) - %map2 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%23) -> !llvm.ptr {name = ""} - omp.target_enter_data if(%12) device(%13 : i32) map_entries(%map1, %map2 : !llvm.ptr, !llvm.ptr) - %24 = llvm.load %7 : !llvm.ptr -> i32 - %25 = llvm.mlir.constant(10 : i32) : i32 - %26 = llvm.icmp "sgt" %24, %25 : i32 - %27 = llvm.load %5 : !llvm.ptr -> i32 - %28 = llvm.mlir.constant(1023 : index) : i64 - %29 = llvm.mlir.constant(0 : index) : i64 - %30 = llvm.mlir.constant(1024 : index) : i64 - %31 = llvm.mlir.constant(1 : index) : i64 - %32 = omp.map.bounds lower_bound(%29 : i64) upper_bound(%28 : i64) extent(%30 : i64) stride(%31 : i64) start_idx(%31 : i64) - %map3 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%32) -> !llvm.ptr {name = ""} - %33 = llvm.mlir.constant(511 : index) : i64 - %34 = llvm.mlir.constant(0 : index) : i64 - %35 = llvm.mlir.constant(512 : index) : i64 - %36 = llvm.mlir.constant(1 : index) : i64 - %37 = omp.map.bounds lower_bound(%34 : i64) upper_bound(%33 : i64) extent(%35 : i64) stride(%36 : i64) start_idx(%36 : i64) - %map4 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%37) -> !llvm.ptr {name = ""} - omp.target_exit_data if(%26) device(%27 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr) - llvm.return +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "dvc", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEdvc"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEi"} : (i64) -> !llvm.ptr + %8 = llvm.mlir.constant(5 : i32) : i32 + llvm.store %8, %7 : i32, !llvm.ptr + %9 = llvm.mlir.constant(2 : i32) : i32 + llvm.store %9, %5 : i32, !llvm.ptr + %10 = llvm.load %7 : !llvm.ptr -> i32 + %11 = llvm.mlir.constant(10 : i32) : i32 + %12 = llvm.icmp "slt" %10, %11 : i32 + %13 = llvm.load %5 : !llvm.ptr -> i32 + %14 = llvm.mlir.constant(1023 : index) : i64 + %15 = llvm.mlir.constant(0 : index) : i64 + %16 = llvm.mlir.constant(1024 : index) : i64 + %17 = llvm.mlir.constant(1 : index) : i64 + %18 = omp.map.bounds lower_bound(%15 : i64) upper_bound(%14 : i64) extent(%16 : i64) stride(%17 : i64) start_idx(%17 : i64) + %map1 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%18) -> !llvm.ptr {name = ""} + %19 = llvm.mlir.constant(511 : index) : i64 + %20 = llvm.mlir.constant(0 : index) : i64 + %21 = llvm.mlir.constant(512 : index) : i64 + %22 = llvm.mlir.constant(1 : index) : i64 + %23 = omp.map.bounds lower_bound(%20 : i64) upper_bound(%19 : i64) extent(%21 : i64) stride(%22 : i64) start_idx(%22 : i64) + %map2 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%23) -> !llvm.ptr {name = ""} + omp.target_enter_data if(%12) device(%13 : i32) map_entries(%map1, %map2 : !llvm.ptr, !llvm.ptr) + %24 = llvm.load %7 : !llvm.ptr -> i32 + %25 = llvm.mlir.constant(10 : i32) : i32 + %26 = llvm.icmp "sgt" %24, %25 : i32 + %27 = llvm.load %5 : !llvm.ptr -> i32 + %28 = llvm.mlir.constant(1023 : index) : i64 + %29 = llvm.mlir.constant(0 : index) : i64 + %30 = llvm.mlir.constant(1024 : index) : i64 + %31 = llvm.mlir.constant(1 : index) : i64 + %32 = omp.map.bounds lower_bound(%29 : i64) upper_bound(%28 : i64) extent(%30 : i64) stride(%31 : i64) start_idx(%31 : i64) + %map3 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%32) -> !llvm.ptr {name = ""} + %33 = llvm.mlir.constant(511 : index) : i64 + %34 = llvm.mlir.constant(0 : index) : i64 + %35 = llvm.mlir.constant(512 : index) : i64 + %36 = llvm.mlir.constant(1 : index) : i64 + %37 = omp.map.bounds lower_bound(%34 : i64) upper_bound(%33 : i64) extent(%35 : i64) stride(%36 : i64) start_idx(%36 : i64) + %map4 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%37) -> !llvm.ptr {name = ""} + omp.target_exit_data if(%26) device(%27 : i32) map_entries(%map3, %map4 : !llvm.ptr, !llvm.ptr) + llvm.return + } } // CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4096, i64 2048] @@ -205,18 +211,20 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { // ----- -llvm.func @_QPopenmp_target_use_dev_ptr() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %map1 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map1 : !llvm.ptr) use_device_ptr(%map2 -> %arg0 : !llvm.ptr) { - %1 = llvm.mlir.constant(10 : i32) : i32 - %2 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr - llvm.store %1, %2 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_ptr() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %map1 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map1 : !llvm.ptr) use_device_ptr(%map2 -> %arg0 : !llvm.ptr) { + %1 = llvm.mlir.constant(10 : i32) : i32 + %2 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr + llvm.store %1, %2 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 8] @@ -249,18 +257,20 @@ llvm.func @_QPopenmp_target_use_dev_ptr() { // ----- -llvm.func @_QPopenmp_target_use_dev_addr() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { - %1 = llvm.mlir.constant(10 : i32) : i32 - %2 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr - llvm.store %1, %2 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_addr() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { + %1 = llvm.mlir.constant(10 : i32) : i32 + %2 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr + llvm.store %1, %2 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 8] @@ -291,17 +301,19 @@ llvm.func @_QPopenmp_target_use_dev_addr() { // ----- -llvm.func @_QPopenmp_target_use_dev_addr_no_ptr() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr - %map = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { - %1 = llvm.mlir.constant(10 : i32) : i32 - llvm.store %1, %arg0 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_addr_no_ptr() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr + %map = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { + %1 = llvm.mlir.constant(10 : i32) : i32 + llvm.store %1, %arg0 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] @@ -331,23 +343,25 @@ llvm.func @_QPopenmp_target_use_dev_addr_no_ptr() { // ----- -llvm.func @_QPopenmp_target_use_dev_addr_nomap() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %1 = llvm.mlir.constant(1 : i64) : i64 - %b = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %map = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { - %2 = llvm.mlir.constant(10 : i32) : i32 - %3 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr - llvm.store %2, %3 : i32, !llvm.ptr - %4 = llvm.mlir.constant(20 : i32) : i32 - %5 = llvm.load %b : !llvm.ptr -> !llvm.ptr - llvm.store %4, %5 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_addr_nomap() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %1 = llvm.mlir.constant(1 : i64) : i64 + %b = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %map = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map : !llvm.ptr) use_device_addr(%map2 -> %arg0 : !llvm.ptr) { + %2 = llvm.mlir.constant(10 : i32) : i32 + %3 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr + llvm.store %2, %3 : i32, !llvm.ptr + %4 = llvm.mlir.constant(20 : i32) : i32 + %5 = llvm.load %b : !llvm.ptr -> !llvm.ptr + llvm.store %4, %5 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 8, i64 0] @@ -387,25 +401,27 @@ llvm.func @_QPopenmp_target_use_dev_addr_nomap() { // ----- -llvm.func @_QPopenmp_target_use_dev_both() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %1 = llvm.mlir.constant(1 : i64) : i64 - %b = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr - %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %map1 = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - %map3 = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%map, %map1 : !llvm.ptr, !llvm.ptr) use_device_addr(%map3 -> %arg0 : !llvm.ptr) use_device_ptr(%map2 -> %arg1 : !llvm.ptr) { - %2 = llvm.mlir.constant(10 : i32) : i32 - %3 = llvm.load %arg1 : !llvm.ptr -> !llvm.ptr - llvm.store %2, %3 : i32, !llvm.ptr - %4 = llvm.mlir.constant(20 : i32) : i32 - %5 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr - llvm.store %4, %5 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_use_dev_both() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %a = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %1 = llvm.mlir.constant(1 : i64) : i64 + %b = llvm.alloca %0 x !llvm.ptr : (i64) -> !llvm.ptr + %map = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map1 = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map3 = omp.map.info var_ptr(%b : !llvm.ptr, !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%map, %map1 : !llvm.ptr, !llvm.ptr) use_device_addr(%map3 -> %arg0 : !llvm.ptr) use_device_ptr(%map2 -> %arg1 : !llvm.ptr) { + %2 = llvm.mlir.constant(10 : i32) : i32 + %3 = llvm.load %arg1 : !llvm.ptr -> !llvm.ptr + llvm.store %2, %3 : i32, !llvm.ptr + %4 = llvm.mlir.constant(20 : i32) : i32 + %5 = llvm.load %arg0 : !llvm.ptr -> !llvm.ptr + llvm.store %4, %5 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 8, i64 8] @@ -448,19 +464,21 @@ llvm.func @_QPopenmp_target_use_dev_both() { // ----- -llvm.func @_QPopenmp_target_data_update() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_data map_entries(%2 : !llvm.ptr) { - %3 = llvm.mlir.constant(99 : i32) : i32 - llvm.store %3, %1 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_update() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target_data map_entries(%2 : !llvm.ptr) { + %3 = llvm.mlir.constant(99 : i32) : i32 + llvm.store %3, %1 : i32, !llvm.ptr + omp.terminator + } + + omp.target_update map_entries(%2 : !llvm.ptr) + + llvm.return } - - omp.target_update map_entries(%2 : !llvm.ptr) - - llvm.return } // CHECK-LABEL: define void @_QPopenmp_target_data_update @@ -488,26 +506,28 @@ llvm.func @_QPopenmp_target_data_update() { // ----- -omp.declare_mapper @_QQFmy_testmy_mapper : !llvm.struct<"_QFmy_testTmy_type", (i32)> { -^bb0(%arg0: !llvm.ptr): - %0 = llvm.mlir.constant(0 : i32) : i32 - %1 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)> - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var%data"} - %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) members(%2 : [0] : !llvm.ptr) -> !llvm.ptr {name = "var", partial_map = true} - omp.declare_mapper.info map_entries(%3, %2 : !llvm.ptr, !llvm.ptr) -} - -llvm.func @_QPopenmp_target_data_mapper() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x !llvm.struct<"_QFmy_testTmy_type", (i32)> {bindc_name = "a"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) mapper(@_QQFmy_testmy_mapper) -> !llvm.ptr {name = "a"} - omp.target_data map_entries(%2 : !llvm.ptr) { - %3 = llvm.mlir.constant(10 : i32) : i32 - %4 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)> - llvm.store %3, %4 : i32, !llvm.ptr - omp.terminator +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + omp.declare_mapper @_QQFmy_testmy_mapper : !llvm.struct<"_QFmy_testTmy_type", (i32)> { + ^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(0 : i32) : i32 + %1 = llvm.getelementptr %arg0[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)> + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var%data"} + %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) members(%2 : [0] : !llvm.ptr) -> !llvm.ptr {name = "var", partial_map = true} + omp.declare_mapper.info map_entries(%3, %2 : !llvm.ptr, !llvm.ptr) + } + + llvm.func @_QPopenmp_target_data_mapper() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x !llvm.struct<"_QFmy_testTmy_type", (i32)> {bindc_name = "a"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)>) map_clauses(tofrom) capture(ByRef) mapper(@_QQFmy_testmy_mapper) -> !llvm.ptr {name = "a"} + omp.target_data map_entries(%2 : !llvm.ptr) { + %3 = llvm.mlir.constant(10 : i32) : i32 + %4 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFmy_testTmy_type", (i32)> + llvm.store %3, %4 : i32, !llvm.ptr + omp.terminator + } + llvm.return } - llvm.return } // CHECK: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] diff --git a/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir b/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir index dba8c553aaca..f5c620a8942d 100644 --- a/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptargetdata-nowait-llvm.mlir @@ -1,13 +1,15 @@ // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s 2>&1 | FileCheck %s -llvm.func @_QPopenmp_target_data_enter() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_enter() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_enter_data map_entries(%2 : !llvm.ptr) nowait + omp.target_enter_data map_entries(%2 : !llvm.ptr) nowait - llvm.return + llvm.return + } } // CHECK: define void @_QPopenmp_target_data_enter() { @@ -32,14 +34,16 @@ llvm.func @_QPopenmp_target_data_enter() { // ----- -llvm.func @_QPopenmp_target_data_update() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_update() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_update map_entries(%2 : !llvm.ptr) nowait + omp.target_update map_entries(%2 : !llvm.ptr) nowait - llvm.return + llvm.return + } } // CHECK: define void @_QPopenmp_target_data_update() { @@ -64,14 +68,16 @@ llvm.func @_QPopenmp_target_data_update() { // ----- -llvm.func @_QPopenmp_target_data_exit() { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr - %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @_QPopenmp_target_data_exit() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target_exit_data map_entries(%2 : !llvm.ptr) nowait + omp.target_exit_data map_entries(%2 : !llvm.ptr) nowait - llvm.return + llvm.return + } } // CHECK: define void @_QPopenmp_target_data_exit() { diff --git a/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir index 717a77e61b9a..53c9b4f55964 100644 --- a/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir +++ b/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir @@ -3,7 +3,7 @@ // This tests checks that a target op inside a data op // We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash. // CHECK: {{.*}} = add i32 {{.*}}, 1 -module attributes { } { +module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget} { %0 = llvm.mlir.constant(99 : index) : i64