Files
clang-p2996/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
Jay Foad 8a52bd82e3 [AMDGPU] Only select VOP3 forms of VOP2 instructions
Change VOP_PAT_GEN to default to not generating an instruction selection
pattern for the VOP2 (e32) form of an instruction, only for the VOP3
(e64) form. This allows SIFoldOperands maximum freedom to fold copies
into the operands of an instruction, before SIShrinkInstructions tries
to shrink it back to the smaller encoding.

This affects the following VOP2 instructions:
v_min_i32
v_max_i32
v_min_u32
v_max_u32
v_and_b32
v_or_b32
v_xor_b32
v_lshr_b32
v_ashr_i32
v_lshl_b32

A further cleanup could simplify or remove VOP_PAT_GEN, since its
optional second argument is never used.

Differential Revision: https://reviews.llvm.org/D114252
2021-11-24 11:15:30 +00:00

156 lines
6.6 KiB
LLVM

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; GCN-LABEL: {{^}}v_ubfe_sub_i32:
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%src = load volatile i32, i32 addrspace(1)* %in0.gep
%width = load volatile i32, i32 addrspace(1)* %in0.gep
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = lshr i32 %shl, %sub
store i32 %bfe, i32 addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32:
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
; GCN-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
; GCN: [[BFE]]
; GCN: [[SHL]]
define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%src = load volatile i32, i32 addrspace(1)* %in0.gep
%width = load volatile i32, i32 addrspace(1)* %in0.gep
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = lshr i32 %shl, %sub
store i32 %bfe, i32 addrspace(1)* %out.gep
store volatile i32 %shl, i32 addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}s_ubfe_sub_i32:
; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]]
; GCN: s_lshr_b32 s{{[0-9]+}}, [[TMP]], [[SUB]]
define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = lshr i32 %shl, %sub
store i32 %bfe, i32 addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32:
; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]]
define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = lshr i32 %shl, %sub
store i32 %bfe, i32 addrspace(1)* %out.gep
store volatile i32 %shl, i32 addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}v_sbfe_sub_i32:
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[WIDTH]]
define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%src = load volatile i32, i32 addrspace(1)* %in0.gep
%width = load volatile i32, i32 addrspace(1)* %in0.gep
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = ashr i32 %shl, %sub
store i32 %bfe, i32 addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32:
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
; GCN-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
; GCN: [[BFE]]
; GCN: [[SHL]]
define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
%in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%src = load volatile i32, i32 addrspace(1)* %in0.gep
%width = load volatile i32, i32 addrspace(1)* %in0.gep
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = ashr i32 %shl, %sub
store i32 %bfe, i32 addrspace(1)* %out.gep
store volatile i32 %shl, i32 addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}s_sbfe_sub_i32:
; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
; GCN: s_lshl_b32 [[TMP:s[0-9]+]], s[[SRC]], [[SUB]]
; GCN: s_ashr_i32 s{{[0-9]+}}, [[TMP]], [[SUB]]
define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = ashr i32 %shl, %sub
store i32 %bfe, i32 addrspace(1)* %out.gep
ret void
}
; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32:
; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}}
; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]]
; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]]
; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]]
define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = ashr i32 %shl, %sub
store i32 %bfe, i32 addrspace(1)* %out.gep
store volatile i32 %shl, i32 addrspace(1)* undef
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }