[ValueTracking] Return true for AddrSpaceCast in canCreateUndefOrPoison (#144686)
In our downstream GPU target, following IR is valid before instcombine
although the second addrspacecast causes UB.
define i1 @test(ptr addrspace(1) noundef %v) {
%0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4)
%1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0)
%2 = icmp eq i32 %1, 0
%3 = addrspacecast ptr addrspace(4) %0 to ptr addrspace(3)
%4 = select i1 %2, ptr addrspace(3) null, ptr addrspace(3) %3
%5 = icmp eq ptr addrspace(3) %4, null
ret i1 %5
}
We have a custom optimization that replaces invalid addrspacecast with
poison, and IR is still valid since `select` stops poison propagation.
However, instcombine pass optimizes `select` to `or`:
%0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4)
%1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0)
%2 = icmp eq i32 %1, 0
%3 = addrspacecast ptr addrspace(1) %v to ptr addrspace(3)
%4 = icmp eq ptr addrspace(3) %3, null
%5 = or i1 %2, %4
ret i1 %5
The transform is invalid for our target.
---------
Co-authored-by: Nikita Popov <github@npopov.com>
This commit is contained in:
@@ -44,7 +44,7 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) {
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]]
|
||||
// CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
|
||||
// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5))
|
||||
// CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TOBOOL_NOT_I]], [[TOBOOL_NOT]]
|
||||
// CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]]
|
||||
// CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]]
|
||||
// CHECK: if.then.i:
|
||||
// CHECK-NEXT: [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16
|
||||
|
||||
@@ -67,7 +67,7 @@ int3 f8(char16 x) {
|
||||
return __builtin_astype(x, int3);
|
||||
}
|
||||
|
||||
//CHECK: define{{.*}} spir_func noundef ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]])
|
||||
//CHECK: define{{.*}} spir_func ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]])
|
||||
//CHECK: %[[cast:.*]] ={{.*}} addrspacecast ptr %[[x]] to ptr addrspace(1)
|
||||
//CHECK: ret ptr addrspace(1) %[[cast]]
|
||||
global int* addr_cast(int *x) {
|
||||
|
||||
@@ -12627,6 +12627,9 @@ result pointer is dereferenceable, the cast is assumed to be
|
||||
reversible (i.e. casting the result back to the original address space
|
||||
should yield the original bit pattern).
|
||||
|
||||
Which address space casts are supported depends on the target. Unsupported
|
||||
address space casts return :ref:`poison <poisonvalues>`.
|
||||
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
|
||||
@@ -7486,6 +7486,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
|
||||
case Instruction::FCmp:
|
||||
case Instruction::GetElementPtr:
|
||||
return false;
|
||||
case Instruction::AddrSpaceCast:
|
||||
return true;
|
||||
default: {
|
||||
const auto *CE = dyn_cast<ConstantExpr>(Op);
|
||||
if (isa<CastInst>(Op) || (CE && CE->isCast()))
|
||||
|
||||
@@ -8,7 +8,7 @@ define amdgpu_kernel void @__omp_offloading_fd00_2c00523__ZN11qmcplusplus7ompBLA
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
|
||||
; CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree noundef readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5)
|
||||
|
||||
25
llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll
Normal file
25
llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll
Normal file
@@ -0,0 +1,25 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s
|
||||
|
||||
; Check that `select B, true, C` isn't optimized to `or B, C`,
|
||||
; because the invalid addrspacecast %asc.shared introduces poison.
|
||||
define i1 @not_fold_select(ptr addrspace(1) noundef %x) {
|
||||
; CHECK-LABEL: define i1 @not_fold_select(
|
||||
; CHECK-SAME: ptr addrspace(1) noundef [[X:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq ptr addrspace(3) [[TMP2]], null
|
||||
; CHECK-NEXT: [[NOT_IS_SHARED:%.*]] = xor i1 [[TMP1]], true
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[NOT_IS_SHARED]], i1 true, i1 [[TMP3]]
|
||||
; CHECK-NEXT: ret i1 [[TMP4]]
|
||||
;
|
||||
entry:
|
||||
%asc.flat = addrspacecast ptr addrspace(1) %x to ptr
|
||||
%is.shared = tail call i1 @llvm.amdgcn.is.shared(ptr %asc.flat)
|
||||
%asc.shared = addrspacecast ptr %asc.flat to ptr addrspace(3)
|
||||
%shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) null
|
||||
%result = icmp eq ptr addrspace(3) %shared.addr, null
|
||||
ret i1 %result
|
||||
}
|
||||
Reference in New Issue
Block a user