Files
clang-p2996/llvm/test/CodeGen/AMDGPU/smrd-gfx10.ll
Jay Foad 2be3189601 [AMDGPU] Don't select _SGPR forms of SMEM instructions on GFX9+
On GFX9+, SMEM instructions have an _SGPR_IMM form which is strictly
more powerful than the _SGPR form. It simplifies codegen if we always
select the _SGPR_IMM form with an immediate offset of 0 instead of the
_SGPR form.

Note that this patch just makes minimal changes to the selection
patterns to prove the concept. Further simplifications are possible to
reduced the number of selection patterns.

On GFX9 the _SGPR form of the Real instruction is still required for
assembly/disassembly but on GFX10+ it can be removed completely.

Differential Revision: https://reviews.llvm.org/D147334
2023-04-17 16:23:30 +01:00

43 lines
2.0 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX11 %s
; GCN-LABEL: {{^}}smrd_imm_dlc:
; GFX10: s_buffer_load_dword s0, s[0:3], 0x0 dlc ; encoding: [0x00,0x40,0x20,0xf4,0x00,0x00,0x00,0xfa]
; GFX11: s_buffer_load_b32 s0, s[0:3], 0x0 dlc ; encoding: [0x00,0x20,0x20,0xf4,0x00,0x00,0x00,0xf8]
define amdgpu_ps float @smrd_imm_dlc(<4 x i32> inreg %desc) #0 {
main_body:
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 0, i32 4)
ret float %r
}
; GCN-LABEL: {{^}}smrd_sgpr_dlc:
; GFX10: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 dlc ; encoding: [0x00,0x40,0x20,0xf4,0x00,0x00,0x00,0x08]
; GFX11: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 dlc ; encoding: [0x00,0x20,0x20,0xf4,0x00,0x00,0x00,0x08]
define amdgpu_ps float @smrd_sgpr_dlc(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
main_body:
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 4)
ret float %r
}
; GCN-LABEL: {{^}}smrd_imm_glc_dlc:
; GFX10: s_buffer_load_dword s0, s[0:3], 0x0 glc dlc ; encoding: [0x00,0x40,0x21,0xf4,0x00,0x00,0x00,0xfa]
; GFX11: s_buffer_load_b32 s0, s[0:3], 0x0 glc dlc ; encoding: [0x00,0x60,0x20,0xf4,0x00,0x00,0x00,0xf8]
define amdgpu_ps float @smrd_imm_glc_dlc(<4 x i32> inreg %desc) #0 {
main_body:
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 0, i32 5)
ret float %r
}
; GCN-LABEL: {{^}}smrd_sgpr_glc_dlc:
; GFX10: s_buffer_load_dword s0, s[0:3], s4 offset:0x0 glc dlc ; encoding: [0x00,0x40,0x21,0xf4,0x00,0x00,0x00,0x08]
; GFX11: s_buffer_load_b32 s0, s[0:3], s4 offset:0x0 glc dlc ; encoding: [0x00,0x60,0x20,0xf4,0x00,0x00,0x00,0x08]
define amdgpu_ps float @smrd_sgpr_glc_dlc(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
main_body:
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 5)
ret float %r
}
declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32)
!0 = !{}