1. Remove the existing code that would encode the constant offsets (if there were any) on buffer intrinsic operations onto their `MachineMemOperand`s. As far as I can tell, this use of `offset` has no substantial impact on the generated code, especially since the same reasoning is performed by areMemAccessesTriviallyDisjoint(). 2. When a buffer resource intrinsic takes a pointer argument as the base resource/descriptor, place that memory argument in the value field of the MachineMemOperand attached to that intrinsic. This is more conservative than what would be produced by more typical LLVM code using GEP, as the Value (for alias analysis purposes) corresponding to accessing buffer[0] and buffer[1] is the same. However, the target-specific analysis of disjoint offsets covers a lot of the simple usecases. Despite this limitation, the new buffer intrinsics, combined with LLVM's existing pointer annotations, allow for non-trivial optimizations, as seen in the new tests, where marking two buffer descriptors "noalias" allows merging together loads and stores in a "load from A, modify loaded value, store to B" sequence, which would not be possible previously. Depends on D147547 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D148184
35 lines
2.3 KiB
LLVM
35 lines
2.3 KiB
LLVM
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after finalize-isel -o %t.mir %s
|
|
; RUN: llc -run-pass=none -verify-machineinstrs %t.mir -o - | FileCheck %s
|
|
|
|
; Test that custom pseudo source values can be round trip serialized through MIR.
|
|
|
|
; CHECK-LABEL: {{^}}name: shader
|
|
; CHECK: %[[#]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed %[[#]], %[[#]], 4, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.arg3, align 1, addrspace 8)
|
|
; CHECK: IMAGE_STORE_V4_V3_nsa_gfx10 killed %[[#]], %[[#]], %[[#]], %[[#]], killed %[[#]], 15, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
|
|
; CHECK: DS_GWS_BARRIER %[[#]], 63, implicit $m0, implicit $exec :: (load (s32) from custom "GWSResource")
|
|
define amdgpu_cs void @shader(i32 %arg0, i32 %arg1, <8 x i32> inreg %arg2, ptr addrspace(8) inreg %arg3) {
|
|
%bload0 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %arg3, i32 4, i32 0, i32 0)
|
|
%bload1 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %arg3, i32 8, i32 0, i32 0)
|
|
%bload2 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %arg3, i32 12, i32 0, i32 0)
|
|
%bload3 = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %arg3, i32 16, i32 0, i32 0)
|
|
%bload0.f = bitcast i32 %bload0 to float
|
|
%bload1.f = bitcast i32 %bload1 to float
|
|
%bload2.f = bitcast i32 %bload2 to float
|
|
%bload3.f = bitcast i32 %bload3 to float
|
|
%istore0 = insertelement <4 x float> undef, float %bload0.f, i32 0
|
|
%istore1 = insertelement <4 x float> %istore0, float %bload0.f, i32 1
|
|
%istore2 = insertelement <4 x float> %istore1, float %bload0.f, i32 2
|
|
%istore3 = insertelement <4 x float> %istore2, float %bload0.f, i32 3
|
|
call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %istore3, i32 15, i32 %arg0, i32 %arg1, i32 0, <8 x i32> %arg2, i32 0, i32 0)
|
|
call void @llvm.amdgcn.ds.gws.barrier(i32 %bload0, i32 63)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
|
|
declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32 immarg) #1
|
|
declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #2
|
|
|
|
attributes #0 = { nounwind willreturn writeonly }
|
|
attributes #1 = { nounwind memory(argmem: read) willreturn }
|
|
attributes #2 = { convergent inaccessiblememonly nounwind }
|