This isn't always folded to fneg for a freestanding fsub depending on the denormal mode. When matching source modifiers, we're implicitly canonicalizing the input so we can fold it here. Doesn't bother handling the VOP3P case since it's only relevant with DAZ, which nobody really uses with f16. For f64, tests show an existing bug where DAGCombiner tries to respect the denormal mode for fsub -0, x, but not after it's lowered to fadd -0, (fneg x). Either the fold is wrong or we shouldn't restrict the fsub case based on the denormal mode. https://reviews.llvm.org/D155652
2842 lines
128 KiB
LLVM
2842 lines
128 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=hawaii -start-before=amdgpu-unify-divergent-exit-nodes -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI %s
|
|
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-before=amdgpu-unify-divergent-exit-nodes < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=fiji -start-before=amdgpu-unify-divergent-exit-nodes < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI %s
|
|
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-before=amdgpu-unify-divergent-exit-nodes < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI %s
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fadd tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_add_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
|
|
|
|
; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%add = fadd float %a, %b
|
|
%fneg = fneg float %add
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_add_store_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%add = fadd float %a, %b
|
|
%fneg = fneg float %add
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %add, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
|
|
; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
|
|
|
|
; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
|
|
; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%add = fadd float %a, %b
|
|
%fneg = fneg float %add
|
|
%use1 = fmul float %add, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_sub_f32_e32
|
|
; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
|
|
|
|
; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_add_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%add = fadd float %fneg.a, %b
|
|
%fneg = fneg float %add
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
|
|
|
|
; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_add_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.b = fneg float %b
|
|
%add = fadd float %a, %fneg.b
|
|
%fneg = fneg float %add
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
|
|
|
|
; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%fneg.b = fneg float %b
|
|
%add = fadd float %fneg.a, %fneg.b
|
|
%fneg = fneg float %add
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
|
|
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
|
|
; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
|
|
; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
|
|
|
|
; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
|
|
; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
|
|
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
|
|
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%add = fadd float %fneg.a, %b
|
|
%fneg = fneg float %add
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %fneg.a, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
|
|
; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
|
|
; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
|
|
; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
|
|
|
|
; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
|
|
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%add = fadd float %fneg.a, %b
|
|
%fneg = fneg float %add
|
|
%use1 = fmul float %fneg.a, %c
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; This one asserted with -enable-no-signed-zeros-fp-math
|
|
; GCN-LABEL: {{^}}fneg_fadd_0:
|
|
; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
|
|
; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
|
|
; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
|
|
|
|
; GCN-NSZ-DAG: v_mul_f32_e32 v{{[0-9]+}}, 0, v
|
|
; GCN-NSZ: v_cmp_ngt_f32
|
|
; GCN-NSZ: v_cndmask_b32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
|
|
define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
|
|
.entry:
|
|
%tmp7 = fdiv float 1.000000e+00, %tmp6
|
|
%tmp8 = fmul float 0.000000e+00, %tmp7
|
|
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
|
|
%.i188 = fadd float %tmp9, 0.000000e+00
|
|
%tmp10 = fcmp uge float %.i188, %tmp2
|
|
%tmp11 = fneg float %.i188
|
|
%.i092 = select i1 %tmp10, float %tmp2, float %tmp11
|
|
%tmp12 = fcmp ule float %.i092, 0.000000e+00
|
|
%.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
|
|
ret float %.i198
|
|
}
|
|
|
|
; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
|
|
; function attribute unsafe-fp-math automatically. Combine with the previous test
|
|
; when that is done.
|
|
; GCN-LABEL: {{^}}fneg_fadd_0_nsz:
|
|
; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]],
|
|
; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]],
|
|
; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fc00000
|
|
; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]], 0, [[A]]
|
|
; GCN-NSZ-DAG: v_cmp_ngt_f32_e32 {{.*}}, s{{[0-9]+}}, [[D]]
|
|
; GCN-NSZ-DAG: v_cndmask_b32_e64 [[E:v[0-9]+]], -[[D]], v{{[0-9]+}},
|
|
; GCN-NSZ-DAG: v_cmp_nlt_f32_e32 {{.*}}, 0
|
|
; GCN-NSZ-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, [[C]], 0,
|
|
define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
|
|
.entry:
|
|
%tmp7 = fdiv afn float 1.000000e+00, %tmp6
|
|
%tmp8 = fmul float 0.000000e+00, %tmp7
|
|
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
|
|
%.i188 = fadd float %tmp9, 0.000000e+00
|
|
%tmp10 = fcmp uge float %.i188, %tmp2
|
|
%tmp11 = fneg float %.i188
|
|
%.i092 = select i1 %tmp10, float %tmp2, float %tmp11
|
|
%tmp12 = fcmp ule float %.i092, 0.000000e+00
|
|
%.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
|
|
ret float %.i198
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fmul tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%mul = fmul float %a, %b
|
|
%fneg = fneg float %mul
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%mul = fmul float %a, %b
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %mul, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
|
|
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%mul = fmul float %a, %b
|
|
%fneg = fneg float %mul
|
|
%use1 = fmul float %mul, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%mul = fmul float %fneg.a, %b
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.b = fneg float %b
|
|
%mul = fmul float %a, %fneg.b
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%fneg.b = fneg float %b
|
|
%mul = fmul float %fneg.a, %fneg.b
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
|
|
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
|
|
define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%mul = fmul float %fneg.a, %b
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %fneg.a, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%mul = fmul float %fneg.a, %b
|
|
%fneg = fneg float %mul
|
|
%use1 = fmul float %fneg.a, %c
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fminnum tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
|
|
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.minnum.f32(float %a, float %b)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN-NOT: v1
|
|
; GCN: v_max_f32_e64 v0, -v0, -v1
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
|
|
%min = call float @llvm.minnum.f32(float %a, float %b)
|
|
%fneg = fneg float %min
|
|
ret float %fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minnum.f32(float %a, float %a)
|
|
%min.fneg = fneg float %min
|
|
store float %min.fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN: v_max_f32_e64 v0, -v0, -v0
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
|
|
%min = call float @llvm.minnum.f32(float %a, float %a)
|
|
%min.fneg = fneg float %min
|
|
ret float %min.fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minnum.f32(float 4.0, float %a)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN: v_max_f32_e64 v0, -v0, -4.0
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
|
|
%min = call float @llvm.minnum.f32(float 4.0, float %a)
|
|
%fneg = fneg float %min
|
|
ret float %fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minnum.f32(float -4.0, float %a)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN: v_max_f32_e64 v0, -v0, 4.0
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
|
|
%min = call float @llvm.minnum.f32(float -4.0, float %a)
|
|
%fneg = fneg float %min
|
|
ret float %fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-NOT: [[A]]
|
|
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[A]]
|
|
; GCN: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MIN]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_0_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call nnan float @llvm.minnum.f32(float 0.0, float %a)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minnum.f32(float -0.0, float %a)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
|
|
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
|
|
; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
|
|
; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
|
|
|
|
; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
|
|
; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
|
|
; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
|
|
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
|
|
; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
|
|
; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
|
|
|
|
; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
|
|
; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
|
|
%fneg = fneg float %min
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
|
|
; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
|
|
|
|
; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
|
|
; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
|
|
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
|
|
|
|
; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
|
|
; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
|
|
; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
|
|
|
|
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile half, ptr addrspace(1) %a.gep
|
|
%min = call half @llvm.minnum.f16(half 0xH3118, half %a)
|
|
%fneg = fsub half -0.000000e+00, %min
|
|
store half %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
|
|
; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
|
|
|
|
; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
|
|
; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
|
|
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
|
|
|
|
; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
|
|
; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
|
|
|
|
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile half, ptr addrspace(1) %a.gep
|
|
%min = call half @llvm.minnum.f16(half 0xHB118, half %a)
|
|
%fneg = fsub half -0.000000e+00, %min
|
|
store half %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
|
|
; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
|
|
|
|
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
|
|
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
|
|
; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
|
|
; SI: v_max_f64 v[[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]], [[NEG_QUIET]], s[[[K_LO]]:[[K_HI]]]
|
|
|
|
; VI: v_min_f64 v[[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]], [[A]], 0.15915494
|
|
; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
|
|
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
|
|
define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile double, ptr addrspace(1) %a.gep
|
|
%min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
|
|
%fneg = fsub double -0.000000e+00, %min
|
|
store double %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
|
|
; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
|
|
|
|
; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
|
|
; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
|
|
; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
|
|
; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s[[[K_LO]]:[[K_HI]]]
|
|
|
|
; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
|
|
; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
|
|
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile double, ptr addrspace(1) %a.gep
|
|
%min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
|
|
%fneg = fsub double -0.000000e+00, %min
|
|
store double %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
|
|
%min = call float @llvm.minnum.f32(float -0.0, float %a)
|
|
%fneg = fneg float %min
|
|
ret float %fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
|
|
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
|
|
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.minnum.f32(float 0.0, float %a)
|
|
%fneg = fneg float %min
|
|
%mul = fmul float %fneg, %b
|
|
store float %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
|
|
; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
|
|
|
|
; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
|
|
; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
|
|
|
|
; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
|
|
; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
|
|
; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
|
|
%fneg = fneg float %min
|
|
%mul = fmul float %fneg, %b
|
|
store float %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN-NOT: v1
|
|
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
|
|
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
|
|
%min = call float @llvm.minnum.f32(float 0.0, float %a)
|
|
%fneg = fneg float %min
|
|
%mul = fmul float %fneg, %b
|
|
ret float %mul
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
|
|
; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
|
|
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%min = call float @llvm.minnum.f32(float %a, float %b)
|
|
%fneg = fneg float %min
|
|
%use1 = fmul float %min, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN-NOT: v1
|
|
; GCN: v_max_f32_e64 v0, -v0, -v1
|
|
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
|
|
%min = call float @llvm.minnum.f32(float %a, float %b)
|
|
%fneg = fneg float %min
|
|
%use1 = fmul float %min, 4.0
|
|
%ins0 = insertelement <2 x float> undef, float %fneg, i32 0
|
|
%ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
|
|
ret <2 x float> %ins1
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fmaxnum tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
|
|
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%max = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%fneg = fneg float %max
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN-NOT: v1
|
|
; GCN: v_min_f32_e64 v0, -v0, -v1
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
|
|
%max = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%fneg = fneg float %max
|
|
ret float %fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%max = call float @llvm.maxnum.f32(float %a, float %a)
|
|
%max.fneg = fneg float %max
|
|
store float %max.fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN: v_min_f32_e64 v0, -v0, -v0
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
|
|
%max = call float @llvm.maxnum.f32(float %a, float %a)
|
|
%max.fneg = fneg float %max
|
|
ret float %max.fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%max = call float @llvm.maxnum.f32(float 4.0, float %a)
|
|
%fneg = fneg float %max
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN: v_min_f32_e64 v0, -v0, -4.0
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
|
|
%max = call float @llvm.maxnum.f32(float 4.0, float %a)
|
|
%fneg = fneg float %max
|
|
ret float %fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%max = call float @llvm.maxnum.f32(float -4.0, float %a)
|
|
%fneg = fneg float %max
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN: v_min_f32_e64 v0, -v0, 4.0
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
|
|
%max = call float @llvm.maxnum.f32(float -4.0, float %a)
|
|
%fneg = fneg float %max
|
|
ret float %fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-NOT: [[A]]
|
|
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
|
|
; GCN: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_0_maxnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%max = call nnan float @llvm.maxnum.f32(float 0.0, float %a)
|
|
%fneg = fneg float %max
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%max = call float @llvm.maxnum.f32(float -0.0, float %a)
|
|
%fneg = fneg float %max
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
|
|
%max = call float @llvm.maxnum.f32(float -0.0, float %a)
|
|
%fneg = fneg float %max
|
|
ret float %fneg
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
|
|
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
|
|
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%max = call float @llvm.maxnum.f32(float 0.0, float %a)
|
|
%fneg = fneg float %max
|
|
%mul = fmul float %fneg, %b
|
|
store float %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN-NOT: v1
|
|
; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
|
|
; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
|
|
%max = call float @llvm.maxnum.f32(float 0.0, float %a)
|
|
%fneg = fneg float %max
|
|
%mul = fmul float %fneg, %b
|
|
ret float %mul
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
|
|
; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
|
|
; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
|
|
; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%max = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%fneg = fneg float %max
|
|
%use1 = fmul float %max, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
|
|
; GCN-NOT: v0
|
|
; GCN-NOT: v1
|
|
; GCN: v_min_f32_e64 v0, -v0, -v1
|
|
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
|
|
; GCN-NEXT: ; return
|
|
define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
|
|
%max = call float @llvm.maxnum.f32(float %a, float %b)
|
|
%fneg = fneg float %max
|
|
%use1 = fmul float %max, 4.0
|
|
%ins0 = insertelement <2 x float> undef, float %fneg, i32 0
|
|
%ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
|
|
ret <2 x float> %ins1
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fma tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
|
|
|
|
; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
|
|
%fneg = fneg float %fma
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
|
|
%fneg = fneg float %fma
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %fma, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
|
|
; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
|
|
; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
|
|
|
|
; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
|
|
; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
|
|
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
|
|
%fneg = fneg float %fma
|
|
%use1 = fmul float %fma, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
|
|
|
|
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
|
|
; GCN-NSZ-NOT: [[FMA]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
|
|
define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fneg.a = fneg float %a
|
|
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
|
|
%fneg = fneg float %fma
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
|
|
|
|
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
|
|
; GCN-NSZ-NOT: [[FMA]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
|
|
define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fneg.b = fneg float %b
|
|
%fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
|
|
%fneg = fneg float %fma
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
|
|
|
|
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
|
|
; GCN-NSZ-NOT: [[FMA]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
|
|
define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fneg.a = fneg float %a
|
|
%fneg.b = fneg float %b
|
|
%fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
|
|
%fneg = fneg float %fma
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
|
|
|
|
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
|
|
; GCN-NSZ-NOT: [[FMA]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
|
|
define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fneg.a = fneg float %a
|
|
%fneg.c = fneg float %c
|
|
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
|
|
%fneg = fneg float %fma
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
|
|
; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
|
|
|
|
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
|
|
; GCN-NSZ-NOT: [[FMA]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
|
|
define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fneg.c = fneg float %c
|
|
%fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
|
|
%fneg = fneg float %fma
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_xor_b32
|
|
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
|
|
; GCN-SAFE: v_xor_b32
|
|
|
|
; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
|
|
; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
|
|
|
|
; GCN-NSZ-NOT: [[FMA]]
|
|
; GCN-NSZ-NOT: [[NEG_A]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
|
|
; GCN-NSZ-NOT: [[NEG_A]]
|
|
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
|
|
define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fneg.a = fneg float %a
|
|
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
|
|
%fneg = fneg float %fma
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %fneg.a, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
|
|
; GCN-SAFE-DAG: v_fma_f32 [[FMA:v[0-9]+]]
|
|
; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
|
|
|
|
; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
|
|
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, float %d) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fneg.a = fneg float %a
|
|
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
|
|
%fneg = fneg float %fma
|
|
%use1 = fmul float %fneg.a, %d
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fmad tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fmad_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
|
|
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
|
|
|
|
; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
|
|
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
|
|
%fneg = fneg float %fma
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fmad_v4f32:
|
|
|
|
; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
|
|
; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
|
|
; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
|
|
; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
|
|
define amdgpu_kernel void @v_fneg_fmad_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile <4 x float>, ptr addrspace(1) %a.gep
|
|
%b = load volatile <4 x float>, ptr addrspace(1) %b.gep
|
|
%c = load volatile <4 x float>, ptr addrspace(1) %c.gep
|
|
%fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
|
%fneg = fneg <4 x float> %fma
|
|
store <4 x float> %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
|
|
; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
|
|
; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
|
|
|
|
; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], [[A]], -[[B]], -[[C]]
|
|
; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
|
|
%fneg = fneg float %fma
|
|
%use1 = fmul float %fma, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fp_extend tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fpext = fpext float %a to double
|
|
%fneg = fsub double -0.000000e+00, %fpext
|
|
store double %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
|
|
; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fneg.a = fneg float %a
|
|
%fpext = fpext float %fneg.a to double
|
|
%fneg = fsub double -0.000000e+00, %fpext
|
|
store double %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
|
|
; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
|
|
define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fneg.a = fneg float %a
|
|
%fpext = fpext float %fneg.a to double
|
|
%fneg = fsub double -0.000000e+00, %fpext
|
|
store volatile double %fneg, ptr addrspace(1) %out.gep
|
|
store volatile float %fneg.a, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_cvt_f64_f32_e32 v[[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]], [[A]]
|
|
; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[CVT_LO]]:[[CVT_HI]]]
|
|
define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fpext = fpext float %a to double
|
|
%fneg = fsub double -0.000000e+00, %fpext
|
|
store volatile double %fneg, ptr addrspace(1) %out.gep
|
|
store volatile double %fpext, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_cvt_f64_f32_e32 v[[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]], [[A]]
|
|
; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
|
|
; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v[[[CVT_LO]]:[[CVT_HI]]], 4.0
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fpext = fpext float %a to double
|
|
%fneg = fsub double -0.000000e+00, %fpext
|
|
%mul = fmul double %fpext, 4.0
|
|
store volatile double %fneg, ptr addrspace(1) %out.gep
|
|
store volatile double %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Source modifiers not folded for f16->f32
|
|
; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
|
|
define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile half, ptr addrspace(1) %a.gep
|
|
%fpext = fpext half %a to float
|
|
%fneg = fneg float %fpext
|
|
store volatile float %fneg, ptr addrspace(1) %out.gep
|
|
store volatile float %fpext, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
|
|
define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile half, ptr addrspace(1) %a.gep
|
|
%fpext = fpext half %a to float
|
|
%fneg = fneg float %fpext
|
|
%mul = fmul float %fpext, 4.0
|
|
store volatile float %fneg, ptr addrspace(1) %out.gep
|
|
store volatile float %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fp_round tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
|
|
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile double, ptr addrspace(1) %a.gep
|
|
%fpround = fptrunc double %a to float
|
|
%fneg = fneg float %fpround
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
|
|
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile double, ptr addrspace(1) %a.gep
|
|
%fneg.a = fsub double -0.000000e+00, %a
|
|
%fpround = fptrunc double %fneg.a to float
|
|
%fneg = fneg float %fpround
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
|
|
; GCN: {{buffer|flat}}_load_dwordx2 v[[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]]
|
|
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v[[[A_LO]]:[[A_HI]]]
|
|
; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[A_LO]]:[[NEG_A_HI]]]
|
|
define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile double, ptr addrspace(1) %a.gep
|
|
%fneg.a = fsub double -0.000000e+00, %a
|
|
%fpround = fptrunc double %fneg.a to float
|
|
%fneg = fneg float %fpround
|
|
store volatile float %fneg, ptr addrspace(1) %out.gep
|
|
store volatile double %fneg.a, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
|
|
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
|
|
; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s[
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
|
|
define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, double %c) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile double, ptr addrspace(1) %a.gep
|
|
%fneg.a = fsub double -0.000000e+00, %a
|
|
%fpround = fptrunc double %fneg.a to float
|
|
%fneg = fneg float %fpround
|
|
%use1 = fmul double %fneg.a, %c
|
|
store volatile float %fneg, ptr addrspace(1) %out.gep
|
|
store volatile double %use1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
|
|
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fpround = fptrunc float %a to half
|
|
%fneg = fsub half -0.000000e+00, %fpround
|
|
store half %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
|
|
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fneg.a = fneg float %a
|
|
%fpround = fptrunc float %fneg.a to half
|
|
%fneg = fsub half -0.000000e+00, %fpround
|
|
store half %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
|
|
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
|
|
define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile double, ptr addrspace(1) %a.gep
|
|
%fpround = fptrunc double %a to float
|
|
%fneg = fneg float %fpround
|
|
store volatile float %fneg, ptr addrspace(1) %out.gep
|
|
store volatile float %fpround, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
|
|
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
|
|
define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fneg.a = fneg float %a
|
|
%fpround = fptrunc float %fneg.a to half
|
|
%fneg = fsub half -0.000000e+00, %fpround
|
|
store volatile half %fneg, ptr addrspace(1) %out.gep
|
|
store volatile float %fneg.a, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
|
|
; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
|
|
; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
|
|
define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fneg.a = fneg float %a
|
|
%fpround = fptrunc float %fneg.a to half
|
|
%fneg = fsub half -0.000000e+00, %fpround
|
|
%use1 = fmul float %fneg.a, %c
|
|
store volatile half %fneg, ptr addrspace(1) %out.gep
|
|
store volatile float %use1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; rcp tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_rcp_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_rcp_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%rcp = call float @llvm.amdgcn.rcp.f32(float %a)
|
|
%fneg = fneg float %rcp
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_rcp_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fneg.a = fneg float %a
|
|
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
|
|
%fneg = fneg float %rcp
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
|
|
define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fneg.a = fneg float %a
|
|
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
|
|
%fneg = fneg float %rcp
|
|
store volatile float %fneg, ptr addrspace(1) %out.gep
|
|
store volatile float %fneg.a, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
|
|
; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%fneg.a = fneg float %a
|
|
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
|
|
%fneg = fneg float %rcp
|
|
%use1 = fmul float %fneg.a, %c
|
|
store volatile float %fneg, ptr addrspace(1) %out.gep
|
|
store volatile float %use1, ptr addrspace(1) undef
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fmul_legacy tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
|
|
%fneg = fneg float %mul
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %mul, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
|
|
%fneg = fneg float %mul
|
|
%use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.b = fneg float %b
|
|
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
|
|
define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%fneg.b = fneg float %b
|
|
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
|
|
; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
|
|
define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
|
|
%fneg = fneg float %mul
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %fneg.a, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
|
|
; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%fneg.a = fneg float %a
|
|
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
|
|
%fneg = fneg float %mul
|
|
%use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
|
|
store volatile float %fneg, ptr addrspace(1) %out
|
|
store volatile float %use1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; sin tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_sin_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
|
|
; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
|
|
; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%sin = call float @llvm.sin.f32(float %a)
|
|
%fneg = fneg float %sin
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%sin = call float @llvm.amdgcn.sin.f32(float %a)
|
|
%fneg = fneg float %sin
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; ftrunc tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_trunc_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_trunc_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%trunc = call float @llvm.trunc.f32(float %a)
|
|
%fneg = fneg float %trunc
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fround tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_round_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_trunc_f32_e32
|
|
; GCN: v_sub_f32_e32
|
|
; GCN: v_cndmask_b32
|
|
|
|
; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
|
|
|
|
; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_round_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%round = call float @llvm.round.f32(float %a)
|
|
%fneg = fneg float %round
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; rint tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_rint_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_rint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%rint = call float @llvm.rint.f32(float %a)
|
|
%fneg = fneg float %rint
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; nearbyint tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_nearbyint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%nearbyint = call float @llvm.nearbyint.f32(float %a)
|
|
%fneg = fneg float %nearbyint
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; fcanonicalize tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
|
define amdgpu_kernel void @v_fneg_canonicalize_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%trunc = call float @llvm.canonicalize.f32(float %a)
|
|
%fneg = fneg float %trunc
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; vintrp tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
|
|
; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
|
|
define amdgpu_kernel void @v_fneg_interp_p1_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%mul = fmul float %a, %b
|
|
%fneg = fneg float %mul
|
|
%intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
|
|
%intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
|
|
store volatile float %intrp0, ptr addrspace(1) %out.gep
|
|
store volatile float %intrp1, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
|
|
; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
|
|
define amdgpu_kernel void @v_fneg_interp_p2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%mul = fmul float %a, %b
|
|
%fneg = fneg float %mul
|
|
%intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
|
|
%intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
|
|
store volatile float %intrp0, ptr addrspace(1) %out.gep
|
|
store volatile float %intrp1, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; CopyToReg tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
|
|
; GCN: s_cbranch_scc0
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
|
|
; GCN: s_endpgm
|
|
|
|
; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
|
|
; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
|
|
define amdgpu_kernel void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%mul = fmul float %a, %b
|
|
%fneg = fneg float %mul
|
|
%cmp0 = icmp eq i32 %d, 0
|
|
br i1 %cmp0, label %if, label %endif
|
|
|
|
if:
|
|
%mul1 = fmul float %fneg, %c
|
|
store volatile float %mul1, ptr addrspace(1) %out.gep
|
|
br label %endif
|
|
|
|
endif:
|
|
store volatile float %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; inlineasm tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; Can't fold into use, so should fold into source
|
|
; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
|
|
; GCN: ; use [[MUL]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
define amdgpu_kernel void @v_fneg_inlineasm_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%mul = fmul float %a, %b
|
|
%fneg = fneg float %mul
|
|
call void asm sideeffect "; use $0", "v"(float %fneg) #0
|
|
store volatile float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; inlineasm tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; Can't fold into use, so should fold into source
|
|
; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
|
|
; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
|
|
; GCN: ; use [[NEG]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
|
|
define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%mul = fmul float %a, %b
|
|
%fneg = fneg float %mul
|
|
call void asm sideeffect "; use $0", "v"(float %fneg) #0
|
|
store volatile float %mul, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; --------------------------------------------------------------------------------
|
|
; code size regression tests
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; There are multiple users of the fneg that must use a VOP3
|
|
; instruction, so there is no penalty
|
|
; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
|
|
; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
|
|
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
|
|
%fneg.a = fneg float %a
|
|
%fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
|
|
%fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
|
|
|
|
store volatile float %fma0, ptr addrspace(1) %out
|
|
store volatile float %fma1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; There are multiple users, but both require using a larger encoding
|
|
; for the modifier.
|
|
|
|
; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
|
|
; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
|
|
%fneg.a = fneg float %a
|
|
%mul0 = fmul float %fneg.a, %b
|
|
%mul1 = fmul float %fneg.a, %c
|
|
|
|
store volatile float %mul0, ptr addrspace(1) %out
|
|
store volatile float %mul1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; One user is VOP3 so has no cost to folding the modifier, the other does.
|
|
; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
|
|
; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
|
|
; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
|
|
%fneg.a = fneg float %a
|
|
%fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
|
|
%mul1 = fmul float %fneg.a, %c
|
|
|
|
store volatile float %fma0, ptr addrspace(1) %out
|
|
store volatile float %mul1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; The use of the fneg requires a code size increase, but folding into
|
|
; the source does not
|
|
|
|
; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
|
|
|
|
; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
|
|
; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
|
|
; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
|
|
|
|
; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
|
|
; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
|
|
; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
|
|
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%d = load volatile float, ptr addrspace(1) %d.gep
|
|
|
|
%fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
|
|
%fneg.fma0 = fneg float %fma0
|
|
%mul1 = fmul float %fneg.fma0, %c
|
|
%mul2 = fmul float %fneg.fma0, %d
|
|
|
|
store volatile float %mul1, ptr addrspace(1) %out
|
|
store volatile float %mul2, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
|
|
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
|
|
|
|
; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
|
|
; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
|
|
; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
|
|
|
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds double, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds double, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%d.gep = getelementptr inbounds double, ptr addrspace(1) %d.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile double, ptr addrspace(1) %a.gep
|
|
%b = load volatile double, ptr addrspace(1) %b.gep
|
|
%c = load volatile double, ptr addrspace(1) %c.gep
|
|
%d = load volatile double, ptr addrspace(1) %d.gep
|
|
|
|
%fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
|
|
%fneg.fma0 = fsub double -0.0, %fma0
|
|
%mul1 = fmul double %fneg.fma0, %c
|
|
%mul2 = fmul double %fneg.fma0, %d
|
|
|
|
store volatile double %mul1, ptr addrspace(1) %out
|
|
store volatile double %mul2, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; %trunc.a has one fneg use, but it requires a code size increase and
|
|
; %the fneg can instead be folded for free into the fma.
|
|
|
|
; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
|
|
; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
|
|
define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%d = load volatile float, ptr addrspace(1) %d.gep
|
|
|
|
%trunc.a = call float @llvm.trunc.f32(float %a)
|
|
%trunc.fneg.a = fneg float %trunc.a
|
|
%fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
|
|
store volatile float %fma0, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
|
|
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
|
|
; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
|
|
; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
|
|
; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
|
|
; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
|
|
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
|
|
define amdgpu_kernel void @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
|
|
%c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
|
|
%d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%b = load volatile float, ptr addrspace(1) %b.gep
|
|
%c = load volatile float, ptr addrspace(1) %c.gep
|
|
%d = load volatile float, ptr addrspace(1) %d.gep
|
|
|
|
%trunc.a = call float @llvm.trunc.f32(float %a)
|
|
%trunc.fneg.a = fneg float %trunc.a
|
|
%fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
|
|
%mul1 = fmul float %trunc.a, %d
|
|
store volatile float %fma0, ptr addrspace(1) %out
|
|
store volatile float %mul1, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; The AMDGPU combine to pull fneg into the FMA operands was being
|
|
; undone by the generic combine to pull the fneg out of the fma if
|
|
; !isFNegFree. We were reporting false for v2f32 even though it will
|
|
; be split into f32 where it will be free.
|
|
; GCN-LABEL: {{^}}fneg_fma_fneg_dagcombine_loop:
|
|
; GCN: s_brev_b32 [[NEGZERO:s[0-9]+]], 1{{$}}
|
|
; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], v2, -v4, [[NEGZERO]]
|
|
; GCN-DAG: v_fma_f32 [[FMA1:v[0-9]+]], v3, -v5, [[NEGZERO]]
|
|
; GCN-DAG: v_sub_f32_e32 [[SUB0:v[0-9]+]], [[FMA0]], v0
|
|
; GCN-DAG: v_sub_f32_e32 [[SUB1:v[0-9]+]], [[FMA1]], v1
|
|
; GCN-DAG: v_mul_f32_e32 v0, [[SUB0]], v4
|
|
; GCN-DAG: v_mul_f32_e32 v1, [[SUB1]], v5
|
|
; GCN: s_setpc_b64
|
|
define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
|
|
bb:
|
|
%i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)
|
|
%i4 = fadd fast <2 x float> %i3, %arg
|
|
%i5 = fneg <2 x float> %i4
|
|
%i6 = fmul fast <2 x float> %i5, %arg2
|
|
ret <2 x float> %i6
|
|
}
|
|
|
|
; This expects denormal flushing, so can't turn this fmul into fneg
|
|
; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg:
|
|
; GCN: s_waitcnt
|
|
; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
|
|
define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
|
|
%mul = fmul float %x, -1.0
|
|
%add = fmul nnan float %mul, %y
|
|
ret float %add
|
|
}
|
|
|
|
; It's legal to turn this fmul into an fneg since denormals are
|
|
; preserved and we know an snan can't happen from the flag.
|
|
; GCN-LABEL: {{^}}denormal_fmul_neg1_to_fneg:
|
|
; GCN: v_mul_f32_e64 v0, -v0, v1
|
|
; GCN-NEXT: s_setpc_b64
|
|
define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {
|
|
%mul = fmul nnan float %x, -1.0
|
|
%add = fmul float %mul, %y
|
|
ret float %add
|
|
}
|
|
|
|
; know the source can't be an snan
|
|
; GCN-LABEL: {{^}}denorm_snan_fmul_neg1_to_fneg:
|
|
; GCN: s_waitcnt
|
|
; GCN-NEXT: v_mul_f32_e64 [[TMP:v[0-9]+]], v0, -v0
|
|
; GCN-NEXT: v_mul_f32_e32 v0, [[TMP]], v1
|
|
; GCN-NEXT: s_setpc_b64
|
|
define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
|
|
%canonical = fmul float %x, %x
|
|
%mul = fmul float %canonical, -1.0
|
|
%add = fmul float %mul, %y
|
|
ret float %add
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}flush_snan_fmul_neg1_to_fneg:
|
|
; GCN: s_waitcnt
|
|
; GCN-NEXT: v_mul_f32_e32 [[TMP:v[0-9]+]], 1.0, v0
|
|
; GCN-NEXT: v_mul_f32_e64 v0, -[[TMP]], v1
|
|
define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
|
|
%quiet = call float @llvm.canonicalize.f32(float %x)
|
|
%mul = fmul float %quiet, -1.0
|
|
%add = fmul float %mul, %y
|
|
ret float %add
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_select_fneg_fneg_f32:
|
|
; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
|
; GCN-NEXT: v_sub_f32_e32 v0, v3, v0
|
|
; GCN-NEXT: s_setpc_b64
|
|
define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {
|
|
%cmp = icmp eq i32 %arg0, 0
|
|
%neg.x = fneg float %x
|
|
%neg.y = fneg float %y
|
|
%select = select i1 %cmp, float %neg.x, float %neg.y
|
|
%add = fadd float %select, %z
|
|
ret float %add
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_select_fneg_fneg_f64:
|
|
; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
|
|
; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
|
|
; GCN-NEXT: v_add_f64 v[0:1], v[5:6], -v[1:2]
|
|
; GCN-NEXT: s_setpc_b64
|
|
define double @fadd_select_fneg_fneg_f64(i32 %arg0, double %x, double %y, double %z) {
|
|
%cmp = icmp eq i32 %arg0, 0
|
|
%neg.x = fneg double %x
|
|
%neg.y = fneg double %y
|
|
%select = select i1 %cmp, double %neg.x, double %neg.y
|
|
%add = fadd double %select, %z
|
|
ret double %add
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_select_fneg_fneg_f16:
|
|
; SI: v_cvt_f16_f32
|
|
; SI: v_cvt_f16_f32
|
|
; SI: v_cvt_f16_f32
|
|
; SI: v_cmp_eq_u32
|
|
; SI: v_cvt_f32_f16
|
|
; SI: v_cvt_f32_f16
|
|
; SI: v_cvt_f32_f16
|
|
; SI: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, vcc
|
|
; SI-NEXT: v_sub_f32_e32
|
|
; SI-NEXT: s_setpc_b64
|
|
|
|
; VI: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
|
; VI-NEXT: v_sub_f16_e32 v0, v3, v0
|
|
; VI-NEXT: s_setpc_b64
|
|
define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
|
|
%cmp = icmp eq i32 %arg0, 0
|
|
%neg.x = fneg half %x
|
|
%neg.y = fneg half %y
|
|
%select = select i1 %cmp, half %neg.x, half %neg.y
|
|
%add = fadd half %select, %z
|
|
ret half %add
|
|
}
|
|
|
|
; FIXME: Terrible code for SI
|
|
; GCN-LABEL: {{^}}fadd_select_fneg_fneg_v2f16:
|
|
; SI: v_cvt_f16_f32
|
|
; SI: v_cvt_f16_f32
|
|
; SI: v_cvt_f16_f32
|
|
; SI: v_cvt_f16_f32
|
|
; SI: v_cmp_eq_u32
|
|
; SI: v_lshlrev_b32_e32
|
|
; SI: v_or_b32_e32
|
|
; SI: v_cndmask_b32
|
|
; SI: v_lshrrev_b32
|
|
; SI: v_cvt_f32_f16
|
|
; SI: v_cvt_f32_f16
|
|
; SI: v_cvt_f32_f16
|
|
; SI: v_cvt_f32_f16
|
|
; SI: v_sub_f32
|
|
; SI: v_sub_f32
|
|
|
|
; VI: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
|
; VI-NEXT: v_sub_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
|
|
; VI-NEXT: v_sub_f16_e32 v0, v3, v0
|
|
; VI-NEXT: v_or_b32_e32 v0, v0, v1
|
|
define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
|
|
%cmp = icmp eq i32 %arg0, 0
|
|
%neg.x = fneg <2 x half> %x
|
|
%neg.y = fneg <2 x half> %y
|
|
%select = select i1 %cmp, <2 x half> %neg.x, <2 x half> %neg.y
|
|
%add = fadd <2 x half> %select, %z
|
|
ret <2 x half> %add
|
|
}
|
|
|
|
; FIXME: This fneg should fold into select
|
|
; GCN-LABEL: {{^}}v_fneg_select_f32:
|
|
; GCN: s_waitcnt
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
|
|
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
|
; GCN-NEXT: s_setpc_b64
|
|
define float @v_fneg_select_f32(i32 %arg0, float %a, float %b, float %c) {
|
|
%cond = icmp eq i32 %arg0, 0
|
|
%select = select i1 %cond, float %a, float %b
|
|
%fneg = fneg float %select
|
|
ret float %fneg
|
|
}
|
|
|
|
; FIXME: This fneg should fold into select
|
|
; GCN-LABEL: {{^}}v_fneg_select_2_f32:
|
|
; GCN: s_waitcnt
|
|
; GCN-NSZ-NEXT: v_add_f32_e32 [[ADD2:v[0-9]+]], 2.0, v1
|
|
; GCN-NSZ-NEXT: v_add_f32_e32 [[ADD4:v[0-9]+]], 4.0, v2
|
|
; GCN-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GCN-NSZ-NEXT: v_cndmask_b32_e32 v0, [[ADD4]], [[ADD2]], vcc
|
|
; GCN-NSZ-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
|
|
|
; GCN-SAFE-NEXT: v_add_f32_e32 [[ADD2:v[0-9]+]], 2.0, v1
|
|
; GCN-SAFE-NEXT: v_add_f32_e32 [[ADD4:v[0-9]+]], 4.0, v2
|
|
; GCN-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
|
; GCN-SAFE-NEXT: v_cndmask_b32_e32 v0, [[ADD4]], [[ADD2]], vcc
|
|
; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
|
|
|
; GCN-NEXT: s_setpc_b64
|
|
define float @v_fneg_select_2_f32(i32 %arg0, float %a, float %b, float %c) {
|
|
%cond = icmp eq i32 %arg0, 0
|
|
%add.0 = fadd float %a, 2.0
|
|
%add.1 = fadd float %b, 4.0
|
|
%select = select i1 %cond, float %add.0, float %add.1
|
|
%neg.select = fneg float %select
|
|
ret float %neg.select
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_posk_select_f32:
|
|
; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
|
|
; GCN-NEXT: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
|
|
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
|
define amdgpu_kernel void @v_fneg_posk_select_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%cond = icmp eq i32 %tid, 0
|
|
%select = select i1 %cond, float 4.0, float %a
|
|
%fneg = fneg float %select
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}v_fneg_negk_select_f32:
|
|
; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
|
|
; GCN-NEXT: v_cndmask_b32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}}, vcc
|
|
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
|
|
define amdgpu_kernel void @v_fneg_negk_select_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) {
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%tid.ext = sext i32 %tid to i64
|
|
%a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
|
|
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
|
|
%a = load volatile float, ptr addrspace(1) %a.gep
|
|
%cond = icmp eq i32 %tid, 0
|
|
%select = select i1 %cond, float -4.0, float %a
|
|
%fneg = fneg float %select
|
|
store float %fneg, ptr addrspace(1) %out.gep
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|
declare float @llvm.fma.f32(float, float, float) #1
|
|
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
|
declare float @llvm.fmuladd.f32(float, float, float) #1
|
|
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
|
|
declare float @llvm.sin.f32(float) #1
|
|
declare float @llvm.trunc.f32(float) #1
|
|
declare float @llvm.round.f32(float) #1
|
|
declare float @llvm.rint.f32(float) #1
|
|
declare float @llvm.nearbyint.f32(float) #1
|
|
declare float @llvm.canonicalize.f32(float) #1
|
|
declare float @llvm.minnum.f32(float, float) #1
|
|
declare float @llvm.maxnum.f32(float, float) #1
|
|
declare half @llvm.minnum.f16(half, half) #1
|
|
declare double @llvm.minnum.f64(double, double) #1
|
|
declare double @llvm.fma.f64(double, double, double) #1
|
|
|
|
declare float @llvm.amdgcn.sin.f32(float) #1
|
|
declare float @llvm.amdgcn.rcp.f32(float) #1
|
|
declare float @llvm.amdgcn.rcp.legacy(float) #1
|
|
declare float @llvm.amdgcn.fmul.legacy(float, float) #1
|
|
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
|
|
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
|
|
|
|
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
|
|
attributes #1 = { nounwind readnone }
|
|
attributes #2 = { nounwind "unsafe-fp-math"="true" }
|
|
attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
|