Files
clang-p2996/llvm/test/CodeGen/X86/fmaxnum.ll
Quentin Colombet 48abac82b8 Revert "[MachineCopyPropagation] Extend pass to do COPY source forwarding"
This reverts commit r323991.

This commit breaks target that don't model all the register constraints
in TableGen. So far the workaround was to set the
hasExtraXXXRegAllocReq, but it proves that it doesn't cover all the
cases.
For instance, when mutating an instruction (like in the lowering of
COPYs) the isRenamable flag is not properly updated. The same problem
will happen when attaching machine operand from one instruction to
another.

Geoff Berry is working on a fix in https://reviews.llvm.org/D43042.

llvm-svn: 325421
2018-02-17 03:05:33 +00:00

240 lines
8.0 KiB
LLVM

; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
declare float @fmaxf(float, float)
declare double @fmax(double, double)
declare x86_fp80 @fmaxl(x86_fp80, x86_fp80)
declare float @llvm.maxnum.f32(float, float)
declare double @llvm.maxnum.f64(double, double)
declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80)
declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
; CHECK-LABEL: @test_fmaxf
; SSE: movaps %xmm0, %xmm2
; SSE-NEXT: cmpunordss %xmm2, %xmm2
; SSE-NEXT: movaps %xmm2, %xmm3
; SSE-NEXT: andps %xmm1, %xmm3
; SSE-NEXT: maxss %xmm0, %xmm1
; SSE-NEXT: andnps %xmm1, %xmm2
; SSE-NEXT: orps %xmm3, %xmm2
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX: vmaxss %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
define float @test_fmaxf(float %x, float %y) {
%z = call float @fmaxf(float %x, float %y) readnone
ret float %z
}
; CHECK-LABEL: @test_fmaxf_minsize
; CHECK: jmp fmaxf
define float @test_fmaxf_minsize(float %x, float %y) minsize {
%z = call float @fmaxf(float %x, float %y) readnone
ret float %z
}
; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
; CHECK-LABEL: @test_fmax
; SSE: movapd %xmm0, %xmm2
; SSE-NEXT: cmpunordsd %xmm2, %xmm2
; SSE-NEXT: movapd %xmm2, %xmm3
; SSE-NEXT: andpd %xmm1, %xmm3
; SSE-NEXT: maxsd %xmm0, %xmm1
; SSE-NEXT: andnpd %xmm1, %xmm2
; SSE-NEXT: orpd %xmm3, %xmm2
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX: vmaxsd %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
define double @test_fmax(double %x, double %y) {
%z = call double @fmax(double %x, double %y) readnone
ret double %z
}
; CHECK-LABEL: @test_fmaxl
; CHECK: callq fmaxl
define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
%z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
ret x86_fp80 %z
}
; CHECK-LABEL: @test_intrinsic_fmaxf
; SSE: movaps %xmm0, %xmm2
; SSE-NEXT: cmpunordss %xmm2, %xmm2
; SSE-NEXT: movaps %xmm2, %xmm3
; SSE-NEXT: andps %xmm1, %xmm3
; SSE-NEXT: maxss %xmm0, %xmm1
; SSE-NEXT: andnps %xmm1, %xmm2
; SSE-NEXT: orps %xmm3, %xmm2
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX: vmaxss %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
define float @test_intrinsic_fmaxf(float %x, float %y) {
%z = call float @llvm.maxnum.f32(float %x, float %y) readnone
ret float %z
}
; CHECK-LABEL: @test_intrinsic_fmax
; SSE: movapd %xmm0, %xmm2
; SSE-NEXT: cmpunordsd %xmm2, %xmm2
; SSE-NEXT: movapd %xmm2, %xmm3
; SSE-NEXT: andpd %xmm1, %xmm3
; SSE-NEXT: maxsd %xmm0, %xmm1
; SSE-NEXT: andnpd %xmm1, %xmm2
; SSE-NEXT: orpd %xmm3, %xmm2
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX: vmaxsd %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
define double @test_intrinsic_fmax(double %x, double %y) {
%z = call double @llvm.maxnum.f64(double %x, double %y) readnone
ret double %z
}
; CHECK-LABEL: @test_intrinsic_fmaxl
; CHECK: callq fmaxl
define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) {
%z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
ret x86_fp80 %z
}
; CHECK-LABEL: @test_intrinsic_fmax_v2f32
; SSE: movaps %xmm1, %xmm2
; SSE-NEXT: maxps %xmm0, %xmm2
; SSE-NEXT: cmpunordps %xmm0, %xmm0
; SSE-NEXT: andps %xmm0, %xmm1
; SSE-NEXT: andnps %xmm2, %xmm0
; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX: vmaxps %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) {
%z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
ret <2 x float> %z
}
; CHECK-LABEL: @test_intrinsic_fmax_v4f32
; SSE: movaps %xmm1, %xmm2
; SSE-NEXT: maxps %xmm0, %xmm2
; SSE-NEXT: cmpunordps %xmm0, %xmm0
; SSE-NEXT: andps %xmm0, %xmm1
; SSE-NEXT: andnps %xmm2, %xmm0
; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX: vmaxps %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) {
%z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
ret <4 x float> %z
}
; CHECK-LABEL: @test_intrinsic_fmax_v2f64
; SSE: movapd %xmm1, %xmm2
; SSE-NEXT: maxpd %xmm0, %xmm2
; SSE-NEXT: cmpunordpd %xmm0, %xmm0
; SSE-NEXT: andpd %xmm0, %xmm1
; SSE-NEXT: andnpd %xmm2, %xmm0
; SSE-NEXT: orpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX: vmaxpd %xmm0, %xmm1, %xmm2
; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
; AVX-NEXT: retq
define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) {
%z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
ret <2 x double> %z
}
; CHECK-LABEL: @test_intrinsic_fmax_v4f64
; SSE: movapd %xmm2, %xmm4
; SSE-NEXT: maxpd %xmm0, %xmm4
; SSE-NEXT: cmpunordpd %xmm0, %xmm0
; SSE-NEXT: andpd %xmm0, %xmm2
; SSE-NEXT: andnpd %xmm4, %xmm0
; SSE-NEXT: orpd %xmm2, %xmm0
; SSE-NEXT: movapd %xmm3, %xmm2
; SSE-NEXT: maxpd %xmm1, %xmm2
; SSE-NEXT: cmpunordpd %xmm1, %xmm1
; SSE-NEXT: andpd %xmm1, %xmm3
; SSE-NEXT: andnpd %xmm2, %xmm1
; SSE-NEXT: orpd %xmm3, %xmm1
; SSE-NEXT: retq
;
; AVX: vmaxpd %ymm0, %ymm1, %ymm2
; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
; AVX-NEXT: retq
define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) {
%z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
ret <4 x double> %z
}
; CHECK-LABEL: @test_intrinsic_fmax_v8f64
; SSE: movapd %xmm4, %xmm8
; SSE-NEXT: maxpd %xmm0, %xmm8
; SSE-NEXT: cmpunordpd %xmm0, %xmm0
; SSE-NEXT: andpd %xmm0, %xmm4
; SSE-NEXT: andnpd %xmm8, %xmm0
; SSE-NEXT: orpd %xmm4, %xmm0
; SSE-NEXT: movapd %xmm5, %xmm4
; SSE-NEXT: maxpd %xmm1, %xmm4
; SSE-NEXT: cmpunordpd %xmm1, %xmm1
; SSE-NEXT: andpd %xmm1, %xmm5
; SSE-NEXT: andnpd %xmm4, %xmm1
; SSE-NEXT: orpd %xmm5, %xmm1
; SSE-NEXT: movapd %xmm6, %xmm4
; SSE-NEXT: maxpd %xmm2, %xmm4
; SSE-NEXT: cmpunordpd %xmm2, %xmm2
; SSE-NEXT: andpd %xmm2, %xmm6
; SSE-NEXT: andnpd %xmm4, %xmm2
; SSE-NEXT: orpd %xmm6, %xmm2
; SSE-NEXT: movapd %xmm7, %xmm4
; SSE-NEXT: maxpd %xmm3, %xmm4
; SSE-NEXT: cmpunordpd %xmm3, %xmm3
; SSE-NEXT: andpd %xmm3, %xmm7
; SSE-NEXT: andnpd %xmm4, %xmm3
; SSE-NEXT: orpd %xmm7, %xmm3
; SSE-NEXT: retq
;
; AVX: vmaxpd %ymm0, %ymm2, %ymm4
; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
; AVX-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
; AVX-NEXT: vmaxpd %ymm1, %ymm3, %ymm2
; AVX-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1
; AVX-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
; AVX-NEXT: retq
define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) {
%z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
ret <8 x double> %z
}