Files
clang-p2996/llvm/test/CodeGen/AMDGPU/fneg.ll
Jay Foad f7684d8510 [DAG] Use legal shift amount type in DAGTypeLegalizer::JoinIntegers
Documentation for TargetLowering::getShiftAmountTy says that LegalTypes
should generally be true during type legalization, so this patch does
that.

On AMDGPU the effect is that we use i32 (a sane type) instead of i64
(pointer sized type) for more shift amounts, which in turn allows more
formation of rotates and funnel shifts pre-legalization.

Differential Revision: https://reviews.llvm.org/D154960
2023-07-12 08:12:09 +01:00

299 lines
9.6 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=FUNC,GCN,SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=FUNC,GCN,VI %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=FUNC,GCN,GFX11 %s
; RUN: not llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefixes=FUNC,R600 %s
; FUNC-LABEL: {{^}}s_fneg_f32:
; R600: -PV
; GCN: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
; GCN: s_xor_b32 [[NEG_VAL:s[0-9]+]], [[VAL]], 0x80000000
; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[NEG_VAL]]
define amdgpu_kernel void @s_fneg_f32(ptr addrspace(1) %out, float %in) {
%fneg = fsub float -0.000000e+00, %in
store float %fneg, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}s_fneg_v2f32:
; R600: -PV
; R600: -PV
; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
define amdgpu_kernel void @s_fneg_v2f32(ptr addrspace(1) nocapture %out, <2 x float> %in) {
%fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
store <2 x float> %fneg, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}s_fneg_v4f32:
; R600: -PV
; R600: -T
; R600: -PV
; R600: -PV
; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
define amdgpu_kernel void @s_fneg_v4f32(ptr addrspace(1) nocapture %out, <4 x float> %in) {
%fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
store <4 x float> %fneg, ptr addrspace(1) %out
ret void
}
; DAGCombiner will transform:
; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
; unless the target returns true for isNegFree()
; FUNC-LABEL: {{^}}fsub0_f32:
; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
; R600-NOT: XOR
; R600: -KC0[2].Z
define amdgpu_kernel void @fsub0_f32(ptr addrspace(1) %out, i32 %in) {
%bc = bitcast i32 %in to float
%fsub = fsub float 0.0, %bc
store float %fsub, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}fneg_free_f32:
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GFX11: s_load_b32 [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN: s_xor_b32 [[RES:s[0-9]+]], [[NEG_VALUE]], 0x80000000
; GCN: v_mov_b32_e32 [[V_RES:v[0-9]+]], [[RES]]
; GCN: buffer_store_{{dword|b32}} [[V_RES]]
; R600-NOT: XOR
; R600: -PV.W
define amdgpu_kernel void @fneg_free_f32(ptr addrspace(1) %out, i32 %in) {
%bc = bitcast i32 %in to float
%fsub = fsub float -0.0, %bc
store float %fsub, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}fneg_fold_f32:
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GFX11: s_load_{{dword|b32}} [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
; GCN-NOT: xor
; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define amdgpu_kernel void @fneg_fold_f32(ptr addrspace(1) %out, float %in) {
%fsub = fsub float -0.0, %in
%fmul = fmul float %fsub, %in
store float %fmul, ptr addrspace(1) %out
ret void
}
; Make sure we turn some integer operations back into fabs
; FUNC-LABEL: {{^}}bitpreserve_fneg_f32:
; GCN: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -4.0
define amdgpu_kernel void @bitpreserve_fneg_f32(ptr addrspace(1) %out, float %in) {
%in.bc = bitcast float %in to i32
%int.abs = xor i32 %in.bc, 2147483648
%bc = bitcast i32 %int.abs to float
%fadd = fmul float %bc, 4.0
store float %fadd, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}s_fneg_i32:
; GCN: s_load_{{dword|b32}} [[IN:s[0-9]+]]
; GCN: s_xor_b32 [[FNEG:s[0-9]+]], [[IN]], 0x80000000
; GCN: v_mov_b32_e32 [[V_FNEG:v[0-9]+]], [[FNEG]]
define amdgpu_kernel void @s_fneg_i32(ptr addrspace(1) %out, i32 %in) {
%fneg = xor i32 %in, -2147483648
store i32 %fneg, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_fneg_i32:
; GCN: s_waitcnt
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64
define i32 @v_fneg_i32(i32 %in) {
%fneg = xor i32 %in, -2147483648
ret i32 %fneg
}
; FUNC-LABEL: {{^}}s_fneg_i32_fp_use:
; GCN: s_load_{{dword|b32}} [[IN:s[0-9]+]]
; GCN: v_sub_f32_e64 v{{[0-9]+}}, 2.0, [[IN]]
define amdgpu_kernel void @s_fneg_i32_fp_use(ptr addrspace(1) %out, i32 %in) {
%fneg = xor i32 %in, -2147483648
%bitcast = bitcast i32 %fneg to float
%fadd = fadd float %bitcast, 2.0
store float %fadd, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_fneg_i32_fp_use:
; GCN: s_waitcnt
; GCN-NEXT: v_sub_f32_e32 v0, 2.0, v0
; GCN-NEXT: s_setpc_b64
define float @v_fneg_i32_fp_use(i32 %in) {
%fneg = xor i32 %in, -2147483648
%bitcast = bitcast i32 %fneg to float
%fadd = fadd float %bitcast, 2.0
ret float %fadd
}
; FUNC-LABEL: {{^}}s_fneg_i64:
; GCN: s_xor_b32 s[[NEG_HI:[0-9]+]], s{{[0-9]+}}, 0x80000000
define amdgpu_kernel void @s_fneg_i64(ptr addrspace(1) %out, i64 %in) {
%fneg = xor i64 %in, -9223372036854775808
store i64 %fneg, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_fneg_i64:
; GCN: s_waitcnt
; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
; GCN-NEXT: s_setpc_b64
define i64 @v_fneg_i64(i64 %in) {
%fneg = xor i64 %in, -9223372036854775808
ret i64 %fneg
}
; FUNC-LABEL: {{^}}s_fneg_i64_fp_use:
; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, -s{{\[[0-9]+:[0-9]+\]}}, 2.0
define amdgpu_kernel void @s_fneg_i64_fp_use(ptr addrspace(1) %out, i64 %in) {
%fneg = xor i64 %in, -9223372036854775808
%bitcast = bitcast i64 %fneg to double
%fadd = fadd double %bitcast, 2.0
store double %fadd, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_fneg_i64_fp_use:
; GCN: s_waitcnt
; GCN-NEXT: v_add_f64 v[0:1], -v[0:1], 2.0
; GCN-NEXT: s_setpc_b64
define double @v_fneg_i64_fp_use(i64 %in) {
%fneg = xor i64 %in, -9223372036854775808
%bitcast = bitcast i64 %fneg to double
%fadd = fadd double %bitcast, 2.0
ret double %fadd
}
; FUNC-LABEL: {{^}}v_fneg_i16:
; GCN: s_waitcnt
; GCN-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
; GCN-NEXT: s_setpc_b64
define i16 @v_fneg_i16(i16 %in) {
%fneg = xor i16 %in, -32768
ret i16 %fneg
}
; FUNC-LABEL: {{^}}s_fneg_i16_fp_use:
; SI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], s{{[0-9]+}}
; SI: v_sub_f32_e32 [[ADD:v[0-9]+]], 2.0, [[CVT0]]
; SI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[ADD]]
; VI: s_load_dword [[IN:s[0-9]+]]
; VI: v_sub_f16_e64 v{{[0-9]+}}, 2.0, [[IN]]
define amdgpu_kernel void @s_fneg_i16_fp_use(ptr addrspace(1) %out, i16 %in) {
%fneg = xor i16 %in, -32768
%bitcast = bitcast i16 %fneg to half
%fadd = fadd half %bitcast, 2.0
store half %fadd, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_fneg_i16_fp_use:
; SI: s_waitcnt
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_sub_f32_e32 v0, 2.0, v0
; SI-NEXT: s_setpc_b64
; VI: s_waitcnt
; VI-NEXT: v_sub_f16_e32 v0, 2.0, v0
; VI-NEXT: s_setpc_b64
define half @v_fneg_i16_fp_use(i16 %in) {
%fneg = xor i16 %in, -32768
%bitcast = bitcast i16 %fneg to half
%fadd = fadd half %bitcast, 2.0
ret half %fadd
}
; FUNC-LABEL: {{^}}s_fneg_v2i16:
; SI: s_xor_b32 s4, s4, 0x80008000
; VI: s_lshr_b32 s5, s4, 16
; VI: s_xor_b32 s4, s4, 0x8000
; VI: s_xor_b32 s5, s5, 0x8000
; VI: s_and_b32 s4, s4, 0xffff
; VI: s_lshl_b32 s5, s5, 16
; VI: s_or_b32 s4, s4, s5
define amdgpu_kernel void @s_fneg_v2i16(ptr addrspace(1) %out, i32 %arg) {
%in = bitcast i32 %arg to <2 x i16>
%fneg = xor <2 x i16> %in, <i16 -32768, i16 -32768>
store <2 x i16> %fneg, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_fneg_v2i16:
; SI: v_xor_b32_e32 v1, 0x8000, v1
; SI: v_xor_b32_e32 v0, 0x8000, v0
; SI: v_lshlrev_b32_e32 v2, 16, v1
; SI: v_and_b32_e32 v0, 0xffff, v0
; SI: v_or_b32_e32 v0, v0, v2
; SI: v_and_b32_e32 v1, 0xffff, v1
; VI: s_waitcnt
; VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; VI-NEXT: s_setpc_b64
define <2 x i16> @v_fneg_v2i16(<2 x i16> %in) {
%fneg = xor <2 x i16> %in, <i16 -32768, i16 -32768>
ret <2 x i16> %fneg
}
; FUNC-LABEL: {{^}}s_fneg_v2i16_fp_use:
; SI: s_lshr_b32 s3, s2, 16
; SI: v_cvt_f32_f16_e32 v0, s3
; SI: v_cvt_f32_f16_e32 v1, s2
; SI: v_sub_f32_e32 v0, 2.0, v0
; SI: v_sub_f32_e32 v1, 2.0, v1
; VI: s_lshr_b32 s5, s4, 16
; VI: s_xor_b32 s5, s5, 0x8000
; VI: s_xor_b32 s4, s4, 0x8000
; VI: v_mov_b32_e32 v0, s5
; VI: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI: v_add_f16_e64 v1, s4, 2.0
; VI: v_or_b32_e32 v0, v1, v0
define amdgpu_kernel void @s_fneg_v2i16_fp_use(ptr addrspace(1) %out, i32 %arg) {
%in = bitcast i32 %arg to <2 x i16>
%fneg = xor <2 x i16> %in, <i16 -32768, i16 -32768>
%bitcast = bitcast <2 x i16> %fneg to <2 x half>
%fadd = fadd <2 x half> %bitcast, <half 2.0, half 2.0>
store <2 x half> %fadd, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_fneg_v2i16_fp_use:
; SI: v_lshrrev_b32_e32 v1, 16, v0
; SI: v_cvt_f32_f16_e32 v0, v0
; SI: v_cvt_f32_f16_e32 v1, v1
; SI: v_sub_f32_e32 v0, 2.0, v0
; SI: v_sub_f32_e32 v1, 2.0, v1
; VI: s_waitcnt
; VI: v_mov_b32_e32 v1, 0x4000
; VI: v_sub_f16_sdwa v1, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI: v_sub_f16_e32 v0, 2.0, v0
; VI: v_or_b32_e32 v0, v0, v1
; VI: s_setpc_b64
define <2 x half> @v_fneg_v2i16_fp_use(i32 %arg) {
%in = bitcast i32 %arg to <2 x i16>
%fneg = xor <2 x i16> %in, <i16 -32768, i16 -32768>
%bitcast = bitcast <2 x i16> %fneg to <2 x half>
%fadd = fadd <2 x half> %bitcast, <half 2.0, half 2.0>
ret <2 x half> %fadd
}