Improve and enable folding of conditional branches with tail calls. 1. Make it so that conditional tail calls can be emitted even when there are multiple predecessors. 2. Don't guard the transformation behind -Os. The rationale for guarding it was static-prediction can be affected by whether the branch is forward of backward. This is no longer true for almost any X86 cpus (anything newer than `SnB`) so is no longer a meaningful concern. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D140931
102 lines
3.1 KiB
LLVM
102 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
|
|
|
|
; PR31455 - https://bugs.llvm.org/show_bug.cgi?id=31455
|
|
; We have to assume that errno can be set, so we have to make a libcall in that case.
|
|
; But it's better for perf to check that the argument is valid rather than the result of
|
|
; sqrtss/sqrtsd.
|
|
; Note: This is really a test of the -partially-inline-libcalls IR pass (and we have an IR test
|
|
; for that), but we're checking the final asm to make sure that comes out as expected too.
|
|
|
|
define float @f(float %val) nounwind {
|
|
; SSE-LABEL: f:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
|
; SSE-NEXT: ucomiss %xmm1, %xmm0
|
|
; SSE-NEXT: jb sqrtf # TAILCALL
|
|
; SSE-NEXT: # %bb.1: # %.split
|
|
; SSE-NEXT: sqrtss %xmm0, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: f:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; AVX-NEXT: vucomiss %xmm1, %xmm0
|
|
; AVX-NEXT: jb sqrtf # TAILCALL
|
|
; AVX-NEXT: # %bb.1: # %.split
|
|
; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%res = tail call float @sqrtf(float %val)
|
|
ret float %res
|
|
}
|
|
|
|
define double @d(double %val) nounwind {
|
|
; SSE-LABEL: d:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: xorpd %xmm1, %xmm1
|
|
; SSE-NEXT: ucomisd %xmm1, %xmm0
|
|
; SSE-NEXT: jb sqrt # TAILCALL
|
|
; SSE-NEXT: # %bb.1: # %.split
|
|
; SSE-NEXT: sqrtsd %xmm0, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: d:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; AVX-NEXT: vucomisd %xmm1, %xmm0
|
|
; AVX-NEXT: jb sqrt # TAILCALL
|
|
; AVX-NEXT: # %bb.1: # %.split
|
|
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%res = tail call double @sqrt(double %val)
|
|
ret double %res
|
|
}
|
|
|
|
define double @minsize(double %x, double %y) minsize {
|
|
; SSE-LABEL: minsize:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: mulsd %xmm0, %xmm0
|
|
; SSE-NEXT: mulsd %xmm1, %xmm1
|
|
; SSE-NEXT: addsd %xmm0, %xmm1
|
|
; SSE-NEXT: sqrtsd %xmm1, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: minsize:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm0
|
|
; AVX-NEXT: vmulsd %xmm1, %xmm1, %xmm1
|
|
; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t3 = fmul fast double %x, %x
|
|
%t4 = fmul fast double %y, %y
|
|
%t5 = fadd fast double %t3, %t4
|
|
%t6 = tail call fast double @llvm.sqrt.f64(double %t5)
|
|
ret double %t6
|
|
}
|
|
|
|
; Partial reg avoidance may involve register allocation
|
|
; rather than adding an instruction.
|
|
|
|
define double @partial_dep_minsize(double %x, double %y) minsize {
|
|
; SSE-LABEL: partial_dep_minsize:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: sqrtsd %xmm1, %xmm0
|
|
; SSE-NEXT: addsd %xmm1, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: partial_dep_minsize:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm0
|
|
; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%t6 = tail call fast double @llvm.sqrt.f64(double %y)
|
|
%t = fadd fast double %t6, %y
|
|
ret double %t
|
|
}
|
|
|
|
declare dso_local float @sqrtf(float)
|
|
declare dso_local double @sqrt(double)
|
|
declare dso_local double @llvm.sqrt.f64(double)
|