[ValueTracking] Return true for AddrSpaceCast in canCreateUndefOrPoison (#144686)

In our downstream GPU target, following IR is valid before instcombine
although the second addrspacecast causes UB.
  define i1 @test(ptr addrspace(1) noundef %v) {
    %0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4)
    %1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0)
    %2 = icmp eq i32 %1, 0
    %3 = addrspacecast ptr addrspace(4) %0 to ptr addrspace(3)
    %4 = select i1 %2, ptr addrspace(3) null, ptr addrspace(3) %3
    %5 = icmp eq ptr addrspace(3) %4, null
    ret i1 %5
  }
We have a custom optimization that replaces invalid addrspacecast with
poison, and IR is still valid since `select` stops poison propagation.

However, instcombine pass optimizes `select` to `or`:
    %0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4)
    %1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0)
    %2 = icmp eq i32 %1, 0
    %3 = addrspacecast ptr addrspace(1) %v to ptr addrspace(3)
    %4 = icmp eq ptr addrspace(3) %3, null
    %5 = or i1 %2, %4
    ret i1 %5
The transform is invalid for our target.

---------

Co-authored-by: Nikita Popov <github@npopov.com>
This commit is contained in:
Wenju He
2025-06-24 08:43:47 +08:00
committed by GitHub
parent a314ac4d22
commit 9d570d568b
6 changed files with 33 additions and 3 deletions

View File

@@ -44,7 +44,7 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) {
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]]
// CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5))
// CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TOBOOL_NOT_I]], [[TOBOOL_NOT]]
// CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]]
// CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]]
// CHECK: if.then.i:
// CHECK-NEXT: [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16

View File

@@ -67,7 +67,7 @@ int3 f8(char16 x) {
return __builtin_astype(x, int3);
}
//CHECK: define{{.*}} spir_func noundef ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]])
//CHECK: define{{.*}} spir_func ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]])
//CHECK: %[[cast:.*]] ={{.*}} addrspacecast ptr %[[x]] to ptr addrspace(1)
//CHECK: ret ptr addrspace(1) %[[cast]]
global int* addr_cast(int *x) {

View File

@@ -12627,6 +12627,9 @@ result pointer is dereferenceable, the cast is assumed to be
reversible (i.e. casting the result back to the original address space
should yield the original bit pattern).
Which address space casts are supported depends on the target. Unsupported
address space casts return :ref:`poison <poisonvalues>`.
Example:
""""""""

View File

@@ -7486,6 +7486,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
case Instruction::FCmp:
case Instruction::GetElementPtr:
return false;
case Instruction::AddrSpaceCast:
return true;
default: {
const auto *CE = dyn_cast<ConstantExpr>(Op);
if (isa<CastInst>(Op) || (CE && CE->isCast()))

View File

@@ -8,7 +8,7 @@ define amdgpu_kernel void @__omp_offloading_fd00_2c00523__ZN11qmcplusplus7ompBLA
; CHECK-NEXT: [[TMP1:%.*]] = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
; CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree noundef readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false)
; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false)
; CHECK-NEXT: ret void
;
%1 = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5)

View File

@@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s
; Check that `select B, true, C` isn't optimized to `or B, C`,
; because the invalid addrspacecast %asc.shared introduces poison.
define i1 @not_fold_select(ptr addrspace(1) noundef %x) {
; CHECK-LABEL: define i1 @not_fold_select(
; CHECK-SAME: ptr addrspace(1) noundef [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr
; CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]])
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr addrspace(3)
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq ptr addrspace(3) [[TMP2]], null
; CHECK-NEXT: [[NOT_IS_SHARED:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[NOT_IS_SHARED]], i1 true, i1 [[TMP3]]
; CHECK-NEXT: ret i1 [[TMP4]]
;
entry:
%asc.flat = addrspacecast ptr addrspace(1) %x to ptr
%is.shared = tail call i1 @llvm.amdgcn.is.shared(ptr %asc.flat)
%asc.shared = addrspacecast ptr %asc.flat to ptr addrspace(3)
%shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) null
%result = icmp eq ptr addrspace(3) %shared.addr, null
ret i1 %result
}