From 9d570d568b37249ddcff8319c5965c4bdc4f7ca2 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 24 Jun 2025 08:43:47 +0800 Subject: [PATCH] [ValueTracking] Return true for AddrSpaceCast in canCreateUndefOrPoison (#144686) In our downstream GPU target, following IR is valid before instcombine although the second addrspacecast causes UB. define i1 @test(ptr addrspace(1) noundef %v) { %0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4) %1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0) %2 = icmp eq i32 %1, 0 %3 = addrspacecast ptr addrspace(4) %0 to ptr addrspace(3) %4 = select i1 %2, ptr addrspace(3) null, ptr addrspace(3) %3 %5 = icmp eq ptr addrspace(3) %4, null ret i1 %5 } We have a custom optimization that replaces invalid addrspacecast with poison, and IR is still valid since `select` stops poison propagation. However, instcombine pass optimizes `select` to `or`: %0 = addrspacecast ptr addrspace(1) %v to ptr addrspace(4) %1 = call i32 @llvm.xxxx.isaddr.shared(ptr addrspace(4) %0) %2 = icmp eq i32 %1, 0 %3 = addrspacecast ptr addrspace(1) %v to ptr addrspace(3) %4 = icmp eq ptr addrspace(3) %3, null %5 = or i1 %2, %4 ret i1 %5 The transform is invalid for our target. --------- Co-authored-by: Nikita Popov --- .../CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl | 2 +- clang/test/CodeGenOpenCL/as_type.cl | 2 +- llvm/docs/LangRef.rst | 3 +++ llvm/lib/Analysis/ValueTracking.cpp | 2 ++ .../aapointer_info_map_invalidation.ll | 2 +- .../InstCombine/AMDGPU/addrspacecast.ll | 25 +++++++++++++++++++ 6 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll diff --git a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl index 62fd20c4d141..f9d7968fc557 100644 --- a/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl +++ b/clang/test/CodeGenOpenCL/amdgcn-buffer-rsrc-type.cl @@ -44,7 +44,7 @@ void consumeBufferPtr(__amdgpu_buffer_rsrc_t *p) { // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A]], align 16, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr addrspace(5) [[A]], addrspacecast (ptr null to ptr addrspace(5)) -// CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TOBOOL_NOT_I]], [[TOBOOL_NOT]] +// CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL_NOT]], i1 true, i1 [[TOBOOL_NOT_I]] // CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END:%.*]], label [[IF_THEN_I:%.*]] // CHECK: if.then.i: // CHECK-NEXT: [[R:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[A]], i32 16 diff --git a/clang/test/CodeGenOpenCL/as_type.cl b/clang/test/CodeGenOpenCL/as_type.cl index 2c6cdc3810b4..33e34b03a563 100644 --- a/clang/test/CodeGenOpenCL/as_type.cl +++ b/clang/test/CodeGenOpenCL/as_type.cl @@ -67,7 +67,7 @@ int3 f8(char16 x) { return __builtin_astype(x, int3); } -//CHECK: define{{.*}} spir_func noundef ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]]) +//CHECK: define{{.*}} spir_func ptr addrspace(1) @addr_cast(ptr noundef readnone captures(ret: address, provenance) %[[x:.*]]) //CHECK: %[[cast:.*]] ={{.*}} addrspacecast ptr %[[x]] to ptr addrspace(1) //CHECK: ret ptr addrspace(1) %[[cast]] global int* addr_cast(int *x) { diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index cc72a37f6859..f9ee3243eea9 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12627,6 +12627,9 @@ result pointer is dereferenceable, the cast is assumed to be reversible (i.e. casting the result back to the original address space should yield the original bit pattern). +Which address space casts are supported depends on the target. Unsupported +address space casts return :ref:`poison `. + Example: """""""" diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 3df9af4bc95f..2389322a2d54 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7486,6 +7486,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, case Instruction::FCmp: case Instruction::GetElementPtr: return false; + case Instruction::AddrSpaceCast: + return true; default: { const auto *CE = dyn_cast(Op); if (isa(Op) || (CE && CE->isCast())) diff --git a/llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll b/llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll index 9a5f789e88fa..02c047f442af 100644 --- a/llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll +++ b/llvm/test/Transforms/Attributor/reduced/aapointer_info_map_invalidation.ll @@ -8,7 +8,7 @@ define amdgpu_kernel void @__omp_offloading_fd00_2c00523__ZN11qmcplusplus7ompBLA ; CHECK-NEXT: [[TMP1:%.*]] = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5) ; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr ; CHECK-NEXT: store ptr [[TMP2]], ptr addrspace(5) [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree noundef readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false) +; CHECK-NEXT: [[TMP3:%.*]] = call fastcc i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr nofree readonly align 8 captures(none) dereferenceable_or_null(8) [[TMP2]], i1 noundef false) ; CHECK-NEXT: ret void ; %1 = alloca [0 x [0 x float]], i32 0, align 8, addrspace(5) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll new file mode 100644 index 000000000000..0be7d5a04b2e --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/addrspacecast.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine %s | FileCheck %s + +; Check that `select B, true, C` isn't optimized to `or B, C`, +; because the invalid addrspacecast %asc.shared introduces poison. +define i1 @not_fold_select(ptr addrspace(1) noundef %x) { +; CHECK-LABEL: define i1 @not_fold_select( +; CHECK-SAME: ptr addrspace(1) noundef [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[X]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq ptr addrspace(3) [[TMP2]], null +; CHECK-NEXT: [[NOT_IS_SHARED:%.*]] = xor i1 [[TMP1]], true +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[NOT_IS_SHARED]], i1 true, i1 [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] +; + entry: + %asc.flat = addrspacecast ptr addrspace(1) %x to ptr + %is.shared = tail call i1 @llvm.amdgcn.is.shared(ptr %asc.flat) + %asc.shared = addrspacecast ptr %asc.flat to ptr addrspace(3) + %shared.addr = select i1 %is.shared, ptr addrspace(3) %asc.shared, ptr addrspace(3) null + %result = icmp eq ptr addrspace(3) %shared.addr, null + ret i1 %result +}