If the calls won't be vectorized, but will be scalarized after vectorization, they should be build as buildvector nodes, not vector nodes. Vectorization of such calls leads to incorrect cost estimation, does not allow to calculate correctly spills costs. Reviewers: lukel97, preames Reviewed By: preames Pull Request: https://github.com/llvm/llvm-project/pull/125070
1739 lines
101 KiB
LLVM
1739 lines
101 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
|
|
; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
|
|
; RUN: | FileCheck %s
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
|
|
; RUN: | FileCheck %s --check-prefix=DEFAULT
|
|
|
|
declare float @fabsf(float) readonly nounwind willreturn
|
|
|
|
define <4 x float> @fabs_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @fabs_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @fabs_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @fabsf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @fabsf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @fabsf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @fabsf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.fabs.f32(float)
|
|
|
|
define <4 x float> @int_fabs_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_fabs_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_fabs_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.fabs.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @sqrtf(float) readonly nounwind willreturn
|
|
|
|
define <4 x float> @sqrt_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @sqrt_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @sqrt_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @sqrtf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @sqrtf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @sqrtf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @sqrtf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
define <4 x float> @int_sqrt_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_sqrt_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.sqrt.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.sqrt.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.sqrt.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.sqrt.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @expf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized exp since RISCV has no such instruction.
|
|
define <4 x float> @exp_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @exp_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @exp_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @expf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @expf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @expf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @expf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.exp.f32(float)
|
|
|
|
; We can not vectorized exp since RISCV has no such instruction.
|
|
define <4 x float> @int_exp_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_exp_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_exp_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.exp.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.exp.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.exp.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.exp.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @logf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized log since RISCV has no such instruction.
|
|
define <4 x float> @log_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @log_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @log_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @logf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @logf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @logf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @logf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.log.f32(float)
|
|
|
|
; We can not vectorized log since RISCV has no such instruction.
|
|
define <4 x float> @int_log_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_log_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_log_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.log.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.log.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.log.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.log.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @sinf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized sin since RISCV has no such instruction.
|
|
define <4 x float> @sin_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @sin_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @sin_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @sinf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @sinf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @sinf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @sinf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.sin.f32(float)
|
|
|
|
; We can not vectorized sin since RISCV has no such instruction.
|
|
define <4 x float> @int_sin_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_sin_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_sin_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.sin.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.sin.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.sin.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.sin.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @asinf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized asin since RISCV has no such instruction.
|
|
define <4 x float> @asin_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @asin_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @asin_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @asinf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @asinf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @asinf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @asinf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.asin.f32(float)
|
|
|
|
; We can not vectorized asin since RISCV has no such instruction.
|
|
define <4 x float> @int_asin_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_asin_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_asin_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.asin.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.asin.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.asin.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.asin.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @cosf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized cos cosce RISCV has no such instruction.
|
|
define <4 x float> @cos_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @cos_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @cos_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @cosf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @cosf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @cosf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @cosf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.cos.f32(float)
|
|
|
|
; We can not vectorized cos cosce RISCV has no such instruction.
|
|
define <4 x float> @int_cos_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_cos_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_cos_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.cos.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.cos.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.cos.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.cos.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @acosf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized acos cosce RISCV has no such instruction.
|
|
define <4 x float> @acos_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @acos_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @acos_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @acosf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @acosf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @acosf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @acosf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.acos.f32(float)
|
|
|
|
; We can not vectorized acos cosce RISCV has no such instruction.
|
|
define <4 x float> @int_acos_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_acos_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_acos_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.acos.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.acos.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.acos.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.acos.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @tanf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized tan tance RISCV has no such instruction.
|
|
define <4 x float> @tan_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @tan_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @tan_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @tanf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @tanf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @tanf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @tanf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.tan.f32(float)
|
|
|
|
; We can not vectorized tan tance RISCV has no such instruction.
|
|
define <4 x float> @int_tan_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_tan_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_tan_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.tan.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.tan.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.tan.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.tan.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @atanf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized atan tance RISCV has no such instruction.
|
|
define <4 x float> @atan_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @atan_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @atan_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @atanf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @atanf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @atanf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @atanf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.atan.f32(float)
|
|
|
|
; We can not vectorized atan tance RISCV has no such instruction.
|
|
define <4 x float> @int_atan_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_atan_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_atan_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.atan.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.atan.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.atan.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.atan.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @sinhf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized sinh since RISCV has no such instruction.
|
|
define <4 x float> @sinh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @sinh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @sinh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @sinhf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @sinhf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @sinhf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @sinhf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.sinh.f32(float)
|
|
|
|
; We can not vectorized sinh since RISCV has no such instruction.
|
|
define <4 x float> @int_sinh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_sinh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_sinh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.sinh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.sinh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.sinh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.sinh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @asinhf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized asinh since RISCV has no such instruction.
|
|
define <4 x float> @asinh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @asinh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @asinh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @asinhf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @asinhf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @asinhf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @asinhf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.asinh.f32(float)
|
|
|
|
; We can not vectorized asinh since RISCV has no such instruction.
|
|
define <4 x float> @int_asinh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_asinh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_asinh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.asinh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.asinh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.asinh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.asinh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @coshf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized cosh since RISCV has no such instruction.
|
|
define <4 x float> @cosh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @cosh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @cosh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @coshf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @coshf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @coshf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @coshf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.cosh.f32(float)
|
|
|
|
; We can not vectorized cosh since RISCV has no such instruction.
|
|
define <4 x float> @int_cosh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_cosh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.cosh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @acoshf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized acosh since RISCV has no such instruction.
|
|
define <4 x float> @acosh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @acosh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @acosh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @acoshf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @acoshf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @acoshf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @acoshf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.acosh.f32(float)
|
|
|
|
; We can not vectorized acosh since RISCV has no such instruction.
|
|
define <4 x float> @int_acosh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_acosh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_acosh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.acosh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.acosh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.acosh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.acosh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @tanhf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized tanh since RISCV has no such instruction.
|
|
define <4 x float> @tanh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @tanh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @tanh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @tanhf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @tanhf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @tanhf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @tanhf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.tanh.f32(float)
|
|
|
|
; We can not vectorized tanh since RISCV has no such instruction.
|
|
define <4 x float> @int_tanh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_tanh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_tanh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.tanh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.tanh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.tanh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.tanh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @atanhf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized atanh since RISCV has no such instruction.
|
|
define <4 x float> @atanh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @atanh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @atanh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @atanhf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @atanhf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @atanhf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @atanhf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.atanh.f32(float)
|
|
|
|
; We can not vectorized atanh since RISCV has no such instruction.
|
|
define <4 x float> @int_atanh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_atanh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_atanh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.atanh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.atanh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.atanh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.atanh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|