[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). For SWDEV-512631
633 lines
32 KiB
LLVM
633 lines
32 KiB
LLVM
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PACKED-GISEL %s
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_vv:
|
|
; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
|
|
define amdgpu_kernel void @fadd_v2_vv(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%add = fadd <2 x float> %load, %load
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_vs:
|
|
; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%add = fadd <2 x float> %load, %x
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v4_vs:
|
|
; GFX900-COUNT-4: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <4 x float>, ptr addrspace(1) %gep, align 16
|
|
%add = fadd <4 x float> %load, %x
|
|
store <4 x float> %add, ptr addrspace(1) %gep, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v32_vs:
|
|
; GFX900-COUNT-32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-COUNT-16: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <32 x float>, ptr addrspace(1) %gep, align 128
|
|
%add = fadd <32 x float> %load, %x
|
|
store <32 x float> %add, ptr addrspace(1) %gep, align 128
|
|
ret void
|
|
}
|
|
|
|
; FIXME: GISel does not use op_sel for splat constants.
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_imm:
|
|
; PACKED: s_mov_b32 s[[K:[0-9]+]], 0x42c80000
|
|
; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, 0x42c80000, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%add = fadd <2 x float> %load, <float 100.0, float 100.0>
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_v_splat:
|
|
; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v0
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1] op_sel_hi:[1,0]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fid = bitcast i32 %id to float
|
|
%tmp1 = insertelement <2 x float> undef, float %fid, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %fid, i64 1
|
|
%add = fadd <2 x float> %load, %k
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_lit_splat:
|
|
; GFX900-COUNT-2: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1.0 op_sel_hi:[1,0]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%add = fadd <2 x float> %load, <float 1.0, float 1.0>
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_lit_hi0:
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
|
; PACKED-DAG: s_mov_b64 [[K:s\[[0-9:]+\]]], 0x3f800000
|
|
; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], [[K]]
|
|
define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%add = fadd <2 x float> %load, <float 1.0, float 0.0>
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_lit_lo0:
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
|
; PACKED-DAG: s_mov_b32 s[[LO:[0-9]+]], 0
|
|
; PACKED-DAG: s_mov_b32 s[[HI:[0-9]+]], 1.0
|
|
; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[LO]]:[[HI]]]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%add = fadd <2 x float> %load, <float 0.0, float 1.0>
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_unfoldable_lit:
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
|
|
; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 1.0
|
|
; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 2.0
|
|
; PACKED: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%add = fadd <2 x float> %load, <float 1.0, float 2.0>
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Fold fneg into v_pk_add_f32 with Global ISel.
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_fneg:
|
|
; GFX900-COUNT-2: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fneg = fsub float -0.0, %x
|
|
%tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %fneg, i64 1
|
|
%add = fadd <2 x float> %load, %k
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_fneg_lo:
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, float %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fneg = fsub float -0.0, %x
|
|
%tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %x, i64 1
|
|
%add = fadd <2 x float> %load, %k
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_fneg_hi:
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_hi:[0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, float %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fneg = fsub float -0.0, %x
|
|
%tmp1 = insertelement <2 x float> undef, float %x, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %fneg, i64 1
|
|
%add = fadd <2 x float> %load, %k
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_fneg_lo2:
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] neg_lo:[0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, float %x, float %y) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fneg = fsub float -0.0, %x
|
|
%tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %y, i64 1
|
|
%add = fadd <2 x float> %load, %k
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2_v_fneg_hi2:
|
|
; GFX900-DAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GFX900-DAG: v_subrev_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0] neg_hi:[0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, float %x, float %y) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fneg = fsub float -0.0, %x
|
|
%tmp1 = insertelement <2 x float> undef, float %y, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %fneg, i64 1
|
|
%add = fadd <2 x float> %load, %k
|
|
store <2 x float> %add, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v2_vv:
|
|
; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
|
|
define amdgpu_kernel void @fmul_v2_vv(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%mul = fmul <2 x float> %load, %load
|
|
store <2 x float> %mul, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v2_vs:
|
|
; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fmul_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%mul = fmul <2 x float> %load, %x
|
|
store <2 x float> %mul, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v4_vs:
|
|
; GFX900-COUNT-4: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-COUNT-2: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <4 x float>, ptr addrspace(1) %gep, align 16
|
|
%mul = fmul <4 x float> %load, %x
|
|
store <4 x float> %mul, ptr addrspace(1) %gep, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v32_vs:
|
|
; GFX900-COUNT-32: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-COUNT-16: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <32 x float>, ptr addrspace(1) %gep, align 128
|
|
%mul = fmul <32 x float> %load, %x
|
|
store <32 x float> %mul, ptr addrspace(1) %gep, align 128
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v2_v_imm:
|
|
; PACKED: s_mov_b32 s[[K:[0-9]+]], 0x42c80000
|
|
; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, 0x42c80000, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
|
|
; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K]]:{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%mul = fmul <2 x float> %load, <float 100.0, float 100.0>
|
|
store <2 x float> %mul, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v2_v_v_splat:
|
|
; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v0
|
|
; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1] op_sel_hi:[1,0]{{$}}
|
|
; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1]{{$}}
|
|
define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fid = bitcast i32 %id to float
|
|
%tmp1 = insertelement <2 x float> undef, float %fid, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %fid, i64 1
|
|
%mul = fmul <2 x float> %load, %k
|
|
store <2 x float> %mul, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v2_v_lit_splat:
|
|
; GFX900-COUNT-2: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0 op_sel_hi:[1,0]{{$}}
|
|
; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%mul = fmul <2 x float> %load, <float 4.0, float 4.0>
|
|
store <2 x float> %mul, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v2_v_unfoldable_lit:
|
|
; GFX900-DAG: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
|
|
; GFX900-DAG: v_mul_f32_e32 v{{[0-9]+}}, 0x40400000, v{{[0-9]+}}
|
|
; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 4.0
|
|
; PACKED-DAG: s_mov_b32 s{{[0-9]+}}, 0x40400000
|
|
; PACKED: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%mul = fmul <2 x float> %load, <float 4.0, float 3.0>
|
|
store <2 x float> %mul, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fmul_v2_v_fneg:
|
|
; GFX900-COUNT-2: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -s{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fneg = fsub float -0.0, %x
|
|
%tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %fneg, i64 1
|
|
%mul = fmul <2 x float> %load, %k
|
|
store <2 x float> %mul, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v2_vv:
|
|
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
|
|
define amdgpu_kernel void @fma_v2_vv(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> %load, <2 x float> %load)
|
|
store <2 x float> %fma, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v2_vs:
|
|
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
|
|
; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fma_v2_vs(ptr addrspace(1) %a, <2 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> %x, <2 x float> %x)
|
|
store <2 x float> %fma, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v4_vs:
|
|
; GFX900-COUNT-4: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
|
|
; PACKED-COUNT-2: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fma_v4_vs(ptr addrspace(1) %a, <4 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <4 x float>, ptr addrspace(1) %gep, align 16
|
|
%fma = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %load, <4 x float> %x, <4 x float> %x)
|
|
store <4 x float> %fma, ptr addrspace(1) %gep, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v32_vs:
|
|
; GFX900-COUNT-32: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
|
|
; PACKED-COUNT-16: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <32 x float>, ptr addrspace(1) %gep, align 128
|
|
%fma = tail call <32 x float> @llvm.fma.v32f32(<32 x float> %load, <32 x float> %x, <32 x float> %x)
|
|
store <32 x float> %fma, ptr addrspace(1) %gep, align 128
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v2_v_imm:
|
|
; GCN-DAG: s_mov_b32 s[[K1:[0-9]+]], 0x42c80000
|
|
; GFX900-DAG: v_mov_b32_e32 v[[K2:[0-9]+]], 0x43480000
|
|
; PACKED-SDAG-DAG: v_mov_b32_e32 v[[K2:[0-9]+]], 0x43480000
|
|
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s[[K1]], v[[K2]]
|
|
; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K1]]:{{[0-9:]+}}], v[[[K2]]:{{[0-9:]+}}] op_sel_hi:[1,0,0]{{$}}
|
|
; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[[[K1]]:{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> <float 100.0, float 100.0>, <2 x float> <float 200.0, float 200.0>)
|
|
store <2 x float> %fma, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v2_v_v_splat:
|
|
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v0, v0
|
|
; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1], v[0:1] op_sel_hi:[1,0,0]{{$}}
|
|
; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[0:1], v[0:1]{{$}}
|
|
define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fid = bitcast i32 %id to float
|
|
%tmp1 = insertelement <2 x float> undef, float %fid, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %fid, i64 1
|
|
%fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> %k, <2 x float> %k)
|
|
store <2 x float> %fma, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v2_v_lit_splat:
|
|
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0
|
|
; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4.0, 1.0 op_sel_hi:[1,0,0]{{$}}
|
|
; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> <float 4.0, float 4.0>, <2 x float> <float 1.0, float 1.0>)
|
|
store <2 x float> %fma, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v2_v_unfoldable_lit:
|
|
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0x40400000
|
|
; GFX900-DAG: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4.0, 1.0
|
|
; GFX900-DAG: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, 2.0
|
|
; PACKED-SDAG-DAG: s_mov_b32 s{{[0-9]+}}, 4.0
|
|
; PACKED-SDAG-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
|
|
; PACKED-SDAG-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
|
|
; PACKED: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> <float 4.0, float 3.0>, <2 x float> <float 1.0, float 2.0>)
|
|
store <2 x float> %fma, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_v2_v_fneg:
|
|
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] op_sel_hi:[1,0,0] neg_lo:[0,1,1] neg_hi:[0,1,1]{{$}}
|
|
; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, float %x) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fneg = fsub float -0.0, %x
|
|
%tmp1 = insertelement <2 x float> undef, float %fneg, i64 0
|
|
%k = insertelement <2 x float> %tmp1, float %fneg, i64 1
|
|
%fma = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %load, <2 x float> %k, <2 x float> %k)
|
|
store <2 x float> %fma, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}add_vector_neg_bitcast_scalar_lo:
|
|
; GFX900-COUNT-2: v_sub_f32_e32
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) {
|
|
bb:
|
|
%vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 4
|
|
%scalar0 = load volatile float, ptr addrspace(3) %arg2, align 4
|
|
%neg.scalar0 = fsub float -0.0, %scalar0
|
|
|
|
%neg.scalar0.vec = insertelement <2 x float> undef, float %neg.scalar0, i32 0
|
|
%neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> undef, <2 x i32> zeroinitializer
|
|
|
|
%result = fadd <2 x float> %vec0, %neg.scalar0.broadcast
|
|
store <2 x float> %result, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fma_vector_vector_neg_scalar_lo_scalar_hi:
|
|
; GFX900-COUNT-2: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_fma_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(3) %arg2) {
|
|
bb:
|
|
%lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
|
|
%arg2.gep = getelementptr inbounds float, ptr addrspace(3) %arg2, i32 2
|
|
|
|
%vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 4
|
|
%vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 4
|
|
|
|
%scalar0 = load volatile float, ptr addrspace(3) %arg2, align 4
|
|
%scalar1 = load volatile float, ptr addrspace(3) %arg2.gep, align 4
|
|
|
|
%vec.ins0 = insertelement <2 x float> undef, float %scalar0, i32 0
|
|
%vec2 = insertelement <2 x float> %vec.ins0, float %scalar1, i32 1
|
|
%neg.vec2 = fsub <2 x float> <float -0.0, float -0.0>, %vec2
|
|
|
|
%result = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %vec0, <2 x float> %vec1, <2 x float> %neg.vec2)
|
|
store <2 x float> %result, ptr addrspace(1) %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}shuffle_add_f32:
|
|
; GFX900-COUNT-2: v_add_f32_e32
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @shuffle_add_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
|
|
bb:
|
|
%vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8
|
|
%lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
|
|
%vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
|
|
%vec1.swap = shufflevector <2 x float> %vec1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
|
|
%result = fadd <2 x float> %vec0, %vec1.swap
|
|
store <2 x float> %result, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}shuffle_neg_add_f32:
|
|
; GFX900-COUNT-2: v_sub_f32_e32
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @shuffle_neg_add_f32(ptr addrspace(1) %out, ptr addrspace(3) %lds) #0 {
|
|
bb:
|
|
%vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8
|
|
%lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
|
|
%f32 = load volatile float, ptr addrspace(3) undef, align 8
|
|
%vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
|
|
%vec1.neg = fsub <2 x float> <float -0.0, float -0.0>, %vec1
|
|
%vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> undef, <2 x i32> <i32 1, i32 0>
|
|
%result = fadd <2 x float> %vec0, %vec1.neg.swap
|
|
store <2 x float> %result, ptr addrspace(1) %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_fadd_fsub_0:
|
|
; GFX900: v_add_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
|
; GFX900: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
|
|
|
; PACKED-SDAG: v_add_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 0
|
|
; PACKED-SDAG: v_add_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
|
|
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
|
|
bb:
|
|
%i12 = fadd <2 x float> zeroinitializer, %arg
|
|
%shift8 = shufflevector <2 x float> %i12, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
|
|
%i13 = fadd <2 x float> zeroinitializer, %shift8
|
|
%i14 = shufflevector <2 x float> %arg, <2 x float> %i13, <2 x i32> <i32 0, i32 2>
|
|
%i15 = fsub <2 x float> %i14, zeroinitializer
|
|
store <2 x float> %i15, ptr undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_fadd_fsub:
|
|
; GFX900: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; GFX900: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
|
|
; PACKED-SDAG: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
|
|
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
; PACKED-GISEL: v_pk_add_f32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_fadd_fsub(<2 x float> %arg, <2 x float> %arg1, ptr addrspace(1) %ptr) {
|
|
bb:
|
|
%i12 = fadd <2 x float> %arg, %arg1
|
|
%shift8 = shufflevector <2 x float> %i12, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
|
|
%i13 = fadd <2 x float> %arg1, %shift8
|
|
%i14 = shufflevector <2 x float> %arg, <2 x float> %i13, <2 x i32> <i32 0, i32 2>
|
|
%i15 = fsub <2 x float> %i14, %arg1
|
|
store <2 x float> %i15, ptr addrspace(1) %ptr
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_shuffle_v4:
|
|
; GFX900-COUNT-4: v_add_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
|
; PACKED-SDAG-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] op_sel_hi:[1,0]{{$}}
|
|
; PACKED-GISEL-COUNT-2: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]{{$}}
|
|
define amdgpu_kernel void @fadd_shuffle_v4(ptr addrspace(1) %arg) {
|
|
bb:
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid
|
|
%in.1 = load <4 x float>, ptr addrspace(1) %gep
|
|
%shuf = shufflevector <4 x float> %in.1, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%add.1 = fadd <4 x float> %in.1, %shuf
|
|
store <4 x float> %add.1, ptr addrspace(1) %gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fneg_v2f32_vec:
|
|
; GFX900-COUNT-2: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
|
; PACKED-SDAG: v_pk_add_f32 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0 neg_lo:[1,1] neg_hi:[1,1]{{$}}
|
|
; PACKED-GISEL-COUNT-2: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
|
|
; PACKED-GISEL: v_pk_mul_f32 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}] op_sel_hi:[0,1]{{$}}
|
|
define amdgpu_kernel void @fneg_v2f32_vec(ptr addrspace(1) %a) {
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i32 %id
|
|
%load = load <2 x float>, ptr addrspace(1) %gep, align 8
|
|
%fneg = fsub <2 x float> <float -0.0, float -0.0>, %load
|
|
store <2 x float> %fneg, ptr addrspace(1) %gep, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fneg_v2f32_scalar:
|
|
; GCN-COUNT-2: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
|
|
define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x) {
|
|
%fneg = fsub <2 x float> <float -0.0, float -0.0>, %x
|
|
store <2 x float> %fneg, ptr addrspace(1) %a, align 8
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x()
|
|
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
|
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
|
declare <32 x float> @llvm.fma.v32f32(<32 x float>, <32 x float>, <32 x float>)
|