This PR is intended to address the limited SLPVectorizer support of tan raised in the comments of this PR: https://github.com/llvm/llvm-project/pull/94559. Right now emitting the tan intrinsisic allows you to vectorize tan, but emitting the libfunc does not. to address this the libcall needs to be mapped to the intrinsic. and the libcall and function name need to be marked approriately so they can be optimized or defined as a call lowering.
182 lines
7.7 KiB
LLVM
182 lines
7.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=slp-vectorizer,dce -slp-threshold=-999 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-apple-macosx10.8.0"
|
|
|
|
declare double @sin(double) nounwind willreturn
|
|
declare double @cos(double) nounwind willreturn
|
|
declare double @tan(double) nounwind willreturn
|
|
declare double @pow(double, double) nounwind willreturn
|
|
declare double @exp2(double) nounwind willreturn
|
|
declare double @sqrt(double) nounwind willreturn
|
|
declare i64 @round(i64) nounwind willreturn
|
|
|
|
|
|
define void @sin_libm(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @sin_libm(
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a0 = load double, ptr %a, align 8
|
|
%idx1 = getelementptr inbounds double, ptr %a, i64 1
|
|
%a1 = load double, ptr %idx1, align 8
|
|
%sin1 = tail call double @sin(double %a0) nounwind readnone
|
|
%sin2 = tail call double @sin(double %a1) nounwind readnone
|
|
store double %sin1, ptr %b, align 8
|
|
%idx2 = getelementptr inbounds double, ptr %b, i64 1
|
|
store double %sin2, ptr %idx2, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @cos_libm(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @cos_libm(
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a0 = load double, ptr %a, align 8
|
|
%idx1 = getelementptr inbounds double, ptr %a, i64 1
|
|
%a1 = load double, ptr %idx1, align 8
|
|
%cos1 = tail call double @cos(double %a0) nounwind readnone
|
|
%cos2 = tail call double @cos(double %a1) nounwind readnone
|
|
store double %cos1, ptr %b, align 8
|
|
%idx2 = getelementptr inbounds double, ptr %b, i64 1
|
|
store double %cos2, ptr %idx2, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @tan_libm(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @tan_libm(
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.tan.v2f64(<2 x double> [[TMP2]])
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a0 = load double, ptr %a, align 8
|
|
%idx1 = getelementptr inbounds double, ptr %a, i64 1
|
|
%a1 = load double, ptr %idx1, align 8
|
|
%tan1 = tail call double @tan(double %a0) nounwind readnone
|
|
%tan2 = tail call double @tan(double %a1) nounwind readnone
|
|
store double %tan1, ptr %b, align 8
|
|
%idx2 = getelementptr inbounds double, ptr %b, i64 1
|
|
store double %tan2, ptr %idx2, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @pow_libm(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @pow_libm(
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a0 = load double, ptr %a, align 8
|
|
%idx1 = getelementptr inbounds double, ptr %a, i64 1
|
|
%a1 = load double, ptr %idx1, align 8
|
|
%pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone
|
|
%pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone
|
|
store double %pow1, ptr %b, align 8
|
|
%idx2 = getelementptr inbounds double, ptr %b, i64 1
|
|
store double %pow2, ptr %idx2, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @exp_libm(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @exp_libm(
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a0 = load double, ptr %a, align 8
|
|
%idx1 = getelementptr inbounds double, ptr %a, i64 1
|
|
%a1 = load double, ptr %idx1, align 8
|
|
%exp1 = tail call double @exp2(double %a0) nounwind readnone
|
|
%exp2 = tail call double @exp2(double %a1) nounwind readnone
|
|
store double %exp1, ptr %b, align 8
|
|
%idx2 = getelementptr inbounds double, ptr %b, i64 1
|
|
store double %exp2, ptr %idx2, align 8
|
|
ret void
|
|
}
|
|
|
|
; No fast-math-flags are required to convert sqrt library calls to an intrinsic.
|
|
; We just need to know that errno is not set (readnone).
|
|
|
|
define void @sqrt_libm_no_errno(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @sqrt_libm_no_errno(
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
|
|
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a0 = load double, ptr %a, align 8
|
|
%idx1 = getelementptr inbounds double, ptr %a, i64 1
|
|
%a1 = load double, ptr %idx1, align 8
|
|
%sqrt1 = tail call double @sqrt(double %a0) nounwind readnone
|
|
%sqrt2 = tail call double @sqrt(double %a1) nounwind readnone
|
|
store double %sqrt1, ptr %b, align 8
|
|
%idx2 = getelementptr inbounds double, ptr %b, i64 1
|
|
store double %sqrt2, ptr %idx2, align 8
|
|
ret void
|
|
}
|
|
|
|
; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize.
|
|
; The nnan on the call does not matter because there's no guarantee in the C standard that a negative
|
|
; input would result in a nan output ("On a domain error, the function returns an
|
|
; implementation-defined value.")
|
|
|
|
define void @sqrt_libm_errno(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @sqrt_libm_errno(
|
|
; CHECK-NEXT: [[A0:%.*]] = load double, ptr [[A:%.*]], align 8
|
|
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 1
|
|
; CHECK-NEXT: [[A1:%.*]] = load double, ptr [[IDX1]], align 8
|
|
; CHECK-NEXT: [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR3:[0-9]+]]
|
|
; CHECK-NEXT: [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR3]]
|
|
; CHECK-NEXT: store double [[SQRT1]], ptr [[B:%.*]], align 8
|
|
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 1
|
|
; CHECK-NEXT: store double [[SQRT2]], ptr [[IDX2]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a0 = load double, ptr %a, align 8
|
|
%idx1 = getelementptr inbounds double, ptr %a, i64 1
|
|
%a1 = load double, ptr %idx1, align 8
|
|
%sqrt1 = tail call nnan double @sqrt(double %a0) nounwind
|
|
%sqrt2 = tail call nnan double @sqrt(double %a1) nounwind
|
|
store double %sqrt1, ptr %b, align 8
|
|
%idx2 = getelementptr inbounds double, ptr %b, i64 1
|
|
store double %sqrt2, ptr %idx2, align 8
|
|
ret void
|
|
}
|
|
|
|
; Negative test case
|
|
define void @round_custom(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: @round_custom(
|
|
; CHECK-NEXT: [[A0:%.*]] = load i64, ptr [[A:%.*]], align 8
|
|
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
|
|
; CHECK-NEXT: [[A1:%.*]] = load i64, ptr [[IDX1]], align 8
|
|
; CHECK-NEXT: [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR4:[0-9]+]]
|
|
; CHECK-NEXT: [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR4]]
|
|
; CHECK-NEXT: store i64 [[ROUND1]], ptr [[B:%.*]], align 8
|
|
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
|
|
; CHECK-NEXT: store i64 [[ROUND2]], ptr [[IDX2]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a0 = load i64, ptr %a, align 8
|
|
%idx1 = getelementptr inbounds i64, ptr %a, i64 1
|
|
%a1 = load i64, ptr %idx1, align 8
|
|
%round1 = tail call i64 @round(i64 %a0) nounwind readnone
|
|
%round2 = tail call i64 @round(i64 %a1) nounwind readnone
|
|
store i64 %round1, ptr %b, align 8
|
|
%idx2 = getelementptr inbounds i64, ptr %b, i64 1
|
|
store i64 %round2, ptr %idx2, align 8
|
|
ret void
|
|
}
|
|
|
|
|
|
|
|
|