[AMDGPU] Extend v2i16 & v2f16 support for llvm.amdgcn.update.dpp intr (#65318)
Authored-by: Pravin Jagtap <Pravin.Jagtap@amd.com>
This commit is contained in:
@@ -1214,6 +1214,8 @@ class UpdateDPPPat<ValueType vt> : GCNPat <
|
||||
|
||||
def : UpdateDPPPat<i32>;
|
||||
def : UpdateDPPPat<f32>;
|
||||
def : UpdateDPPPat<v2i16>;
|
||||
def : UpdateDPPPat<v2f16>;
|
||||
|
||||
} // End OtherPredicates = [isGFX8Plus]
|
||||
|
||||
|
||||
@@ -221,9 +221,245 @@ define amdgpu_kernel void @dpp_test_f32_imm_comb8(ptr addrspace(1) %out, float %
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 1, i32 1, i32 1, i1 false)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb1:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,0] row_mask:0x0 bank_mask:0x0{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16_imm_comb1(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 0, i32 0, i32 0, i1 false)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb2:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,0,0,0] row_mask:0x3 bank_mask:0x3{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16_imm_comb2(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 3, i32 3, i32 3, i1 false)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb3:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x2 bank_mask:0x3 bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16_imm_comb3(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 1, i32 2, i32 3, i1 true)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb4:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,1,0,0] row_mask:0x3 bank_mask:0x2 bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16_imm_comb4(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 4, i32 3, i32 2, i1 true)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb5:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16_imm_comb5(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 63, i32 62, i32 61, i1 true)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb6:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16_imm_comb6(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 63, i32 63, i32 63, i1 true)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb7:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16_imm_comb7(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 64, i32 64, i32 64, i1 true)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb8:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2i16_imm_comb8(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) {
|
||||
%tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 31, i32 63, i32 128, i1 true)
|
||||
store <2 x i16> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 1, i32 1, i32 1, i1 false)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb1:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,0] row_mask:0x0 bank_mask:0x0{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16_imm_comb1(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 0, i32 0, i32 0, i1 false)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb2:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,0,0,0] row_mask:0x3 bank_mask:0x3{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16_imm_comb2(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 3, i32 3, i32 3, i1 false)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb3:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x2 bank_mask:0x3 bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16_imm_comb3(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 1, i32 2, i32 3, i1 true)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb4:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,1,0,0] row_mask:0x3 bank_mask:0x2 bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16_imm_comb4(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 4, i32 3, i32 2, i1 true)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb5:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16_imm_comb5(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 63, i32 62, i32 61, i1 true)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb6:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16_imm_comb6(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 63, i32 63, i32 63, i1 true)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb7:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16_imm_comb7(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 64, i32 64, i32 64, i1 true)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb8:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-OPT: s_mov
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
|
||||
define amdgpu_kernel void @dpp_test_v2f16_imm_comb8(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) {
|
||||
%tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 31, i32 63, i32 128, i1 true)
|
||||
store <2 x half> %tmp0, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
declare void @llvm.amdgcn.s.barrier()
|
||||
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
|
||||
declare <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16>, <2 x i16>, i32, i32, i32, i1) #0
|
||||
declare <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half>, <2 x half>, i32, i32, i32, i1) #0
|
||||
declare float @llvm.amdgcn.update.dpp.f32(float, float, i32, i32, i32, i1) #0
|
||||
declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user