From 7df2eba7fac2281498092dedea67d85d4f7ecfd5 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 11 Sep 2021 18:34:47 -0500 Subject: [PATCH] [Attributor][OpenMP] Add assumption for non-call assembly instructions Inline assembly is scary but we need to support it for the OpenMP GPU device runtime. The new assumption expresses the fact that it may not have call semantics, that is, it will not call another function but simply perform an operation or side-effect. This is important for reachability in the presence of inline assembly. Differential Revision: https://reviews.llvm.org/D109986 --- llvm/include/llvm/IR/Assumptions.h | 4 ++ llvm/lib/IR/Assumptions.cpp | 1 + .../Transforms/IPO/AttributorAttributes.cpp | 5 +- .../Transforms/Attributor/reachability.ll | 49 +++++++++++++++++++ openmp/libomptarget/DeviceRTL/include/Types.h | 9 ++++ 5 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Attributor/reachability.ll diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h index 08e6c8b6f1e0..2d2ecfbde6e6 100644 --- a/llvm/include/llvm/IR/Assumptions.h +++ b/llvm/include/llvm/IR/Assumptions.h @@ -34,6 +34,10 @@ extern StringSet<> KnownAssumptionStrings; /// Helper that allows to insert a new assumption string in the known assumption /// set by creating a (static) object. struct KnownAssumptionString { + KnownAssumptionString(const char *AssumptionStr) + : AssumptionStr(AssumptionStr) { + KnownAssumptionStrings.insert(AssumptionStr); + } KnownAssumptionString(StringRef AssumptionStr) : AssumptionStr(AssumptionStr) { KnownAssumptionStrings.insert(AssumptionStr); diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp index 3d24ae062841..27977d5d56b0 100644 --- a/llvm/lib/IR/Assumptions.cpp +++ b/llvm/lib/IR/Assumptions.cpp @@ -107,4 +107,5 @@ StringSet<> llvm::KnownAssumptionStrings({ "omp_no_openmp_routines", // OpenMP 5.1 "omp_no_parallelism", // OpenMP 5.1 "ompx_spmd_amenable", // OpenMPOpt extension + "ompx_no_call_asm", // OpenMPOpt extension }); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 9328e7eb44c4..61eb0e43700d 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -9540,7 +9541,9 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { CallBase *CB = cast(getCtxI()); if (CB->isInlineAsm()) { - setHasUnknownCallee(false, Change); + if (!hasAssumption(*CB->getCaller(), "ompx_no_call_asm") && + !hasAssumption(*CB, "ompx_no_call_asm")) + setHasUnknownCallee(false, Change); return Change; } diff --git a/llvm/test/Transforms/Attributor/reachability.ll b/llvm/test/Transforms/Attributor/reachability.ll new file mode 100644 index 000000000000..989fd4920554 --- /dev/null +++ b/llvm/test/Transforms/Attributor/reachability.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM + +define void @non_recursive_asm_fn() #0 { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define {{[^@]+}}@non_recursive_asm_fn +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void asm sideeffect "barrier.sync $0 +; CHECK-NEXT: ret void +; +entry: + call void asm sideeffect "barrier.sync $0;", "r,~{memory},~{dirflag},~{fpsr},~{flags}"(i32 1) + ret void +} + +define void @non_recursive_asm_cs() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define {{[^@]+}}@non_recursive_asm_cs +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void asm sideeffect "barrier.sync $0 +; CHECK-NEXT: ret void +; +entry: + call void asm sideeffect "barrier.sync $0;", "r,~{memory},~{dirflag},~{fpsr},~{flags}"(i32 1) #0 + ret void +} + +define void @recursive_asm() { +; CHECK-LABEL: define {{[^@]+}}@recursive_asm() { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void asm sideeffect "barrier.sync $0 +; CHECK-NEXT: ret void +; +entry: + call void asm sideeffect "barrier.sync $0;", "r,~{memory},~{dirflag},~{fpsr},~{flags}"(i32 1) + ret void +} + +attributes #0 = { "llvm.assume"="ompx_no_call_asm" } +;. +; CHECK: attributes #[[ATTR0]] = { norecurse "llvm.assume"="ompx_no_call_asm" } +; CHECK: attributes #[[ATTR1]] = { norecurse } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { "llvm.assume"="ompx_no_call_asm" } +;. diff --git a/openmp/libomptarget/DeviceRTL/include/Types.h b/openmp/libomptarget/DeviceRTL/include/Types.h index 0ff0cee66b3f..34f3e924a3a2 100644 --- a/openmp/libomptarget/DeviceRTL/include/Types.h +++ b/openmp/libomptarget/DeviceRTL/include/Types.h @@ -12,6 +12,15 @@ #ifndef OMPTARGET_TYPES_H #define OMPTARGET_TYPES_H +// Tell the compiler that we do not have any "call-like" inline assembly in the +// device rutime. That means we cannot have inline assembly which will call +// another function but only inline assembly that performs some operation or +// side-effect and then continues execution with something on the existing call +// stack. +// +// TODO: Find a good place for this +#pragma omp assumes ext_no_call_asm + /// Base type declarations for freestanding mode /// ///{