Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
Alexey Bataev 7dca2c628c [SLP]Gather scalarized calls
If the calls won't be vectorized, but will be scalarized after
vectorization, they should be build as buildvector nodes, not vector
nodes. Vectorization of such calls leads to incorrect cost estimation,
does not allow to calculate correctly spills costs.

Reviewers: lukel97, preames

Reviewed By: preames

Pull Request: https://github.com/llvm/llvm-project/pull/125070
2025-02-04 19:09:57 -05:00

1739 lines
101 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
; RUN: | FileCheck %s
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
; RUN: | FileCheck %s --check-prefix=DEFAULT
declare float @fabsf(float) readonly nounwind willreturn
define <4 x float> @fabs_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @fabs_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
; DEFAULT-LABEL: define <4 x float> @fabs_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @fabsf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @fabsf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @fabsf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @fabsf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.fabs.f32(float)
define <4 x float> @int_fabs_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_fabs_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
; DEFAULT-LABEL: define <4 x float> @int_fabs_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.fabs.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @sqrtf(float) readonly nounwind willreturn
define <4 x float> @sqrt_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @sqrt_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
; DEFAULT-LABEL: define <4 x float> @sqrt_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @sqrtf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @sqrtf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @sqrtf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @sqrtf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.sqrt.f32(float)
define <4 x float> @int_sqrt_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_sqrt_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.sqrt.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.sqrt.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.sqrt.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.sqrt.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @expf(float) readonly nounwind willreturn
; We can not vectorized exp since RISCV has no such instruction.
define <4 x float> @exp_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @exp_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @exp_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @expf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @expf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @expf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @expf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.exp.f32(float)
; We can not vectorized exp since RISCV has no such instruction.
define <4 x float> @int_exp_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_exp_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_exp_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.exp.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.exp.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.exp.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.exp.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @logf(float) readonly nounwind willreturn
; We can not vectorized log since RISCV has no such instruction.
define <4 x float> @log_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @log_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @log_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @logf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @logf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @logf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @logf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.log.f32(float)
; We can not vectorized log since RISCV has no such instruction.
define <4 x float> @int_log_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_log_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_log_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.log.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.log.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.log.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.log.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @sinf(float) readonly nounwind willreturn
; We can not vectorized sin since RISCV has no such instruction.
define <4 x float> @sin_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @sin_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @sin_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @sinf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @sinf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @sinf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @sinf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.sin.f32(float)
; We can not vectorized sin since RISCV has no such instruction.
define <4 x float> @int_sin_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_sin_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_sin_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.sin.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.sin.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.sin.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.sin.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @asinf(float) readonly nounwind willreturn
; We can not vectorized asin since RISCV has no such instruction.
define <4 x float> @asin_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @asin_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @asin_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @asinf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @asinf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @asinf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @asinf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.asin.f32(float)
; We can not vectorized asin since RISCV has no such instruction.
define <4 x float> @int_asin_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_asin_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_asin_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.asin.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.asin.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.asin.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.asin.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @cosf(float) readonly nounwind willreturn
; We can not vectorized cos cosce RISCV has no such instruction.
define <4 x float> @cos_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @cos_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @cos_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @cosf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @cosf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @cosf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @cosf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.cos.f32(float)
; We can not vectorized cos cosce RISCV has no such instruction.
define <4 x float> @int_cos_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_cos_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_cos_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.cos.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.cos.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.cos.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.cos.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @acosf(float) readonly nounwind willreturn
; We can not vectorized acos cosce RISCV has no such instruction.
define <4 x float> @acos_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @acos_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @acos_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @acosf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @acosf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @acosf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @acosf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.acos.f32(float)
; We can not vectorized acos cosce RISCV has no such instruction.
define <4 x float> @int_acos_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_acos_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_acos_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.acos.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.acos.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.acos.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.acos.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @tanf(float) readonly nounwind willreturn
; We can not vectorized tan tance RISCV has no such instruction.
define <4 x float> @tan_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @tan_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @tan_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @tanf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @tanf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @tanf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @tanf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.tan.f32(float)
; We can not vectorized tan tance RISCV has no such instruction.
define <4 x float> @int_tan_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_tan_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_tan_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.tan.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.tan.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.tan.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.tan.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @atanf(float) readonly nounwind willreturn
; We can not vectorized atan tance RISCV has no such instruction.
define <4 x float> @atan_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @atan_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @atan_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @atanf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @atanf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @atanf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @atanf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.atan.f32(float)
; We can not vectorized atan tance RISCV has no such instruction.
define <4 x float> @int_atan_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_atan_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_atan_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.atan.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.atan.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.atan.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.atan.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @sinhf(float) readonly nounwind willreturn
; We can not vectorized sinh since RISCV has no such instruction.
define <4 x float> @sinh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @sinh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @sinh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @sinhf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @sinhf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @sinhf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @sinhf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.sinh.f32(float)
; We can not vectorized sinh since RISCV has no such instruction.
define <4 x float> @int_sinh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_sinh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_sinh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.sinh.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.sinh.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.sinh.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.sinh.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @asinhf(float) readonly nounwind willreturn
; We can not vectorized asinh since RISCV has no such instruction.
define <4 x float> @asinh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @asinh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @asinh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @asinhf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @asinhf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @asinhf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @asinhf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.asinh.f32(float)
; We can not vectorized asinh since RISCV has no such instruction.
define <4 x float> @int_asinh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_asinh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_asinh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.asinh.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.asinh.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.asinh.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.asinh.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @coshf(float) readonly nounwind willreturn
; We can not vectorized cosh since RISCV has no such instruction.
define <4 x float> @cosh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @cosh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @cosh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @coshf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @coshf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @coshf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @coshf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.cosh.f32(float)
; We can not vectorized cosh since RISCV has no such instruction.
define <4 x float> @int_cosh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_cosh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.cosh.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @acoshf(float) readonly nounwind willreturn
; We can not vectorized acosh since RISCV has no such instruction.
define <4 x float> @acosh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @acosh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @acosh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @acoshf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @acoshf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @acoshf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @acoshf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.acosh.f32(float)
; We can not vectorized acosh since RISCV has no such instruction.
define <4 x float> @int_acosh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_acosh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_acosh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.acosh.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.acosh.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.acosh.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.acosh.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @tanhf(float) readonly nounwind willreturn
; We can not vectorized tanh since RISCV has no such instruction.
define <4 x float> @tanh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @tanh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @tanh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @tanhf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @tanhf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @tanhf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @tanhf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.tanh.f32(float)
; We can not vectorized tanh since RISCV has no such instruction.
define <4 x float> @int_tanh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_tanh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_tanh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.tanh.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.tanh.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.tanh.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.tanh.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @atanhf(float) readonly nounwind willreturn
; We can not vectorized atanh since RISCV has no such instruction.
define <4 x float> @atanh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @atanh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @atanh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @atanhf(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @atanhf(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @atanhf(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @atanhf(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}
declare float @llvm.atanh.f32(float)
; We can not vectorized atanh since RISCV has no such instruction.
define <4 x float> @int_atanh_4x(ptr %a) {
; CHECK-LABEL: define <4 x float> @int_atanh_4x
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
;
; DEFAULT-LABEL: define <4 x float> @int_atanh_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
%vecext = extractelement <4 x float> %0, i32 0
%1 = tail call fast float @llvm.atanh.f32(float %vecext)
%vecins = insertelement <4 x float> poison, float %1, i32 0
%vecext.1 = extractelement <4 x float> %0, i32 1
%2 = tail call fast float @llvm.atanh.f32(float %vecext.1)
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
%vecext.2 = extractelement <4 x float> %0, i32 2
%3 = tail call fast float @llvm.atanh.f32(float %vecext.2)
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
%vecext.3 = extractelement <4 x float> %0, i32 3
%4 = tail call fast float @llvm.atanh.f32(float %vecext.3)
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
ret <4 x float> %vecins.3
}