Files
clang-p2996/llvm/test/CodeGen/AMDGPU/replace-lds-by-ptr-use-within-phi-inst.ll
Jon Chesterfield 50ad3478bd Disable ReplaceLDS pass, patch up tests to match
Most tests passed with an extra argument to explicitly enable the pass.
One does not, deleted it as part of this change. I can't see why the codegen
would be different between default on and default off but switched on. It
can be retrieved from the project history.

This would be a revert, but git revert was not clean. Disabling the pass
and leaving it in tree is less likely to cause breakage elsewhere than
patching up the git revert conflicts on unfamiliar code. It'll be landed
without review, as @hsmhsm is believed unavailable at present.

Differential Revision: https://reviews.llvm.org/D104962
2021-06-26 01:36:42 +01:00

94 lines
3.4 KiB
LLVM

; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
; DESCRIPTION:
;
; Replace lds globals used within phi instruction.
;
; Original LDS should exist.
; CHECK: @lds.1 = addrspace(3) global i32 undef, align 4
; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4
@lds.1 = addrspace(3) global i32 undef, align 4
@lds.2 = addrspace(3) global i32 undef, align 4
; Pointers should be created.
; CHECK: @lds.1.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
; CHECK: @lds.2.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
define void @f0(i32 %arg) {
; CHECK-LABEL: bb:
; CHECK: %0 = load i16, i16 addrspace(3)* @lds.2.ptr, align 2
; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
; CHECK: %2 = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)*
; CHECK: %3 = load i16, i16 addrspace(3)* @lds.1.ptr, align 2
; CHECK: %4 = getelementptr i8, i8 addrspace(3)* null, i16 %3
; CHECK: %5 = bitcast i8 addrspace(3)* %4 to i32 addrspace(3)*
; CHECK: %id = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK: %my.tmp = sub i32 %id, %arg
; CHECK: br label %bb1
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
%my.tmp = sub i32 %id, %arg
br label %bb1
; CHECK-LABEL: bb1:
; CHECK: %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
; CHECK: %6 = icmp ne i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), %5
; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1
; CHECK: %cmp0 = icmp slt i32 %lsr.iv.next, 0
; CHECK: br i1 %cmp0, label %bb4, label %Flow
bb1:
%lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
%lsr.iv.next = add i32 %lsr.iv, 1
%cmp0 = icmp slt i32 %lsr.iv.next, 0
br i1 %cmp0, label %bb4, label %Flow
; CHECK-LABEL: bb4:
; CHECK: %load = load volatile i32, i32 addrspace(1)* undef, align 4
; CHECK: %cmp1 = icmp sge i32 %my.tmp, %load
; CHECK: br label %Flow
bb4:
%load = load volatile i32, i32 addrspace(1)* undef, align 4
%cmp1 = icmp sge i32 %my.tmp, %load
br label %Flow
; CHECK-LABEL: Flow:
; CHECK: %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
; CHECK: %my.tmp3 = phi i32 addrspace(3)* [ %2, %bb4 ], [ %5, %bb1 ]
; CHECK: %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ %6, %bb1 ]
; CHECK: br i1 %my.tmp4, label %bb9, label %bb1
Flow:
%my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
%my.tmp3 = phi i32 addrspace(3)* [@lds.2, %bb4 ], [ @lds.1, %bb1 ]
%my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds.1), %bb1 ]
br i1 %my.tmp4, label %bb9, label %bb1
; CHECK-LABEL: bb9:
; CHECK: store volatile i32 7, i32 addrspace(3)* undef, align 4
; CHECK: ret void
bb9:
store volatile i32 7, i32 addrspace(3)* undef
ret void
}
; CHECK-LABEL: @k0
; CHECK: %1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
; CHECK: %2 = icmp eq i32 %1, 0
; CHECK: br i1 %2, label %3, label %4
;
; CHECK-LABEL: 3:
; CHECK: store i16 ptrtoint (i32 addrspace(3)* @lds.2 to i16), i16 addrspace(3)* @lds.2.ptr, align 2
; CHECK: store i16 ptrtoint (i32 addrspace(3)* @lds.1 to i16), i16 addrspace(3)* @lds.1.ptr, align 2
; CHECK: br label %4
;
; CHECK-LABEL: 4:
; CHECK: call void @llvm.amdgcn.wave.barrier()
; CHECK: call void @f0(i32 %arg)
; CHECK: ret void
define amdgpu_kernel void @k0(i32 %arg) {
call void @f0(i32 %arg)
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()