Codegen for the raw/struct buffer access intrinsics would update the offset in the MMO to reflect the combined offset, if it was known to be constant. If the combined offset was not known to be constant, or if there was an index, it would set the offset in the MMO to 0. This is unsafe because it makes it look like the access does not alias with another access with a fixed non-zero offset. Fix these cases by setting the pointer in the MMO to null, to reflect the fact that we do not have any known IR value pointer + constant offset for the access. Differential Revision: https://reviews.llvm.org/D106284
37 lines
2.3 KiB
LLVM
37 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s -stop-after=amdgpu-isel | FileCheck -check-prefix=GCN %s
|
|
|
|
; We want to see a BUFFER_LOAD, some register shuffling, and a BUFFER_STORE.
|
|
; Specifically, we do not want to see a BUFFER_STORE that says "store into
|
|
; stack" in the middle.
|
|
|
|
define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
|
|
; GCN-LABEL: name: main
|
|
; GCN: bb.0.main_body:
|
|
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
|
; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
|
|
; GCN: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
|
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4)
|
|
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
|
|
; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
|
|
; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
|
|
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, killed [[COPY1]], %subreg.sub2
|
|
; GCN: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
|
|
; GCN: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
|
|
; GCN: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
|
|
; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4)
|
|
; GCN: S_ENDPGM 0
|
|
main_body:
|
|
%tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0)
|
|
%tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
|
|
%tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
|
%tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
|
|
call void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32> %tmp29, <4 x i32> undef, i32 undef, i32 0, i32 0) #3
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg)
|
|
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg)
|