Previous implementation may took some extra time, when walked over the same instructions several times. And also it did not include proper analysis for cross-basic-block use of the vectorized values. This version fixes it. It walks over the tree and checks the deps between entries and their operands. If there are non-vectorized calls in between, it adds a single(!) spill cost, because the vector value should be spilled/reloaded only once. Also, this version caches analysis for each entries, which are detected, and do not repeats it, uses data, found during previous analysis for previous nodes. Also, it has the internal limit. If the number of instructions between nodes and their operands is too big (> than ScheduleRegionSizeBudget / VectorizableTree.size()), it is considered that the spill is required. It allows to improve compile time. Reviewers: preames, RKSimon, mikhailramalho Reviewed By: preames Pull Request: https://github.com/llvm/llvm-project/pull/129258
1800 lines
103 KiB
LLVM
1800 lines
103 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
|
|
; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
|
|
; RUN: | FileCheck %s
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
|
|
; RUN: | FileCheck %s --check-prefix=DEFAULT
|
|
|
|
declare float @fabsf(float) readonly nounwind willreturn
|
|
|
|
define <4 x float> @fabs_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @fabs_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @fabs_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @fabsf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @fabsf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @fabsf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @fabsf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.fabs.f32(float)
|
|
|
|
define <4 x float> @int_fabs_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_fabs_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_fabs_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.fabs.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @sqrtf(float) readonly nounwind willreturn
|
|
|
|
define <4 x float> @sqrt_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @sqrt_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @sqrt_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @sqrtf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @sqrtf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @sqrtf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @sqrtf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.sqrt.f32(float)
|
|
|
|
define <4 x float> @int_sqrt_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_sqrt_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
|
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
|
|
; DEFAULT-NEXT: ret <4 x float> [[TMP1]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.sqrt.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.sqrt.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.sqrt.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.sqrt.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @expf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized exp since RISCV has no such instruction.
|
|
define <4 x float> @exp_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @exp_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @exp_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @expf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @expf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @expf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @expf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.exp.f32(float)
|
|
|
|
; We can not vectorized exp since RISCV has no such instruction.
|
|
define <4 x float> @int_exp_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_exp_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_exp_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.exp.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.exp.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.exp.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.exp.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @logf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized log since RISCV has no such instruction.
|
|
define <4 x float> @log_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @log_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @log_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @logf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @logf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @logf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @logf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.log.f32(float)
|
|
|
|
; We can not vectorized log since RISCV has no such instruction.
|
|
define <4 x float> @int_log_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_log_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_log_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.log.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.log.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.log.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.log.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @sinf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized sin since RISCV has no such instruction.
|
|
define <4 x float> @sin_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @sin_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @sin_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @sinf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @sinf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @sinf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @sinf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.sin.f32(float)
|
|
|
|
; We can not vectorized sin since RISCV has no such instruction.
|
|
define <4 x float> @int_sin_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_sin_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_sin_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.sin.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.sin.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.sin.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.sin.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @asinf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized asin since RISCV has no such instruction.
|
|
define <4 x float> @asin_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @asin_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @asin_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @asinf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @asinf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @asinf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @asinf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.asin.f32(float)
|
|
|
|
; We can not vectorized asin since RISCV has no such instruction.
|
|
define <4 x float> @int_asin_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_asin_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_asin_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.asin.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.asin.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.asin.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.asin.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @cosf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized cos cosce RISCV has no such instruction.
|
|
define <4 x float> @cos_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @cos_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @cos_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @cosf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @cosf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @cosf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @cosf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.cos.f32(float)
|
|
|
|
; We can not vectorized cos cosce RISCV has no such instruction.
|
|
define <4 x float> @int_cos_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_cos_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_cos_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.cos.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.cos.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.cos.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.cos.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @acosf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized acos cosce RISCV has no such instruction.
|
|
define <4 x float> @acos_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @acos_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @acos_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @acosf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @acosf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @acosf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @acosf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.acos.f32(float)
|
|
|
|
; We can not vectorized acos cosce RISCV has no such instruction.
|
|
define <4 x float> @int_acos_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_acos_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_acos_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.acos.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.acos.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.acos.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.acos.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @tanf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized tan tance RISCV has no such instruction.
|
|
define <4 x float> @tan_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @tan_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @tan_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @tanf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @tanf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @tanf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @tanf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.tan.f32(float)
|
|
|
|
; We can not vectorized tan tance RISCV has no such instruction.
|
|
define <4 x float> @int_tan_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_tan_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_tan_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.tan.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.tan.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.tan.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.tan.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @atanf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized atan tance RISCV has no such instruction.
|
|
define <4 x float> @atan_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @atan_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @atan_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @atanf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @atanf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @atanf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @atanf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.atan.f32(float)
|
|
|
|
; We can not vectorized atan tance RISCV has no such instruction.
|
|
define <4 x float> @int_atan_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_atan_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_atan_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.atan.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.atan.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.atan.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.atan.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @sinhf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized sinh since RISCV has no such instruction.
|
|
define <4 x float> @sinh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @sinh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @sinh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @sinhf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @sinhf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @sinhf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @sinhf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.sinh.f32(float)
|
|
|
|
; We can not vectorized sinh since RISCV has no such instruction.
|
|
define <4 x float> @int_sinh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_sinh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_sinh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.sinh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.sinh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.sinh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.sinh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @asinhf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized asinh since RISCV has no such instruction.
|
|
define <4 x float> @asinh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @asinh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @asinh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @asinhf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @asinhf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @asinhf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @asinhf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.asinh.f32(float)
|
|
|
|
; We can not vectorized asinh since RISCV has no such instruction.
|
|
define <4 x float> @int_asinh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_asinh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_asinh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.asinh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.asinh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.asinh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.asinh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @coshf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized cosh since RISCV has no such instruction.
|
|
define <4 x float> @cosh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @cosh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @cosh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @coshf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @coshf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @coshf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @coshf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.cosh.f32(float)
|
|
|
|
; We can not vectorized cosh since RISCV has no such instruction.
|
|
define <4 x float> @int_cosh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_cosh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.cosh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @acoshf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized acosh since RISCV has no such instruction.
|
|
define <4 x float> @acosh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @acosh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @acosh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @acoshf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @acoshf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @acoshf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @acoshf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.acosh.f32(float)
|
|
|
|
; We can not vectorized acosh since RISCV has no such instruction.
|
|
define <4 x float> @int_acosh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_acosh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_acosh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.acosh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.acosh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.acosh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.acosh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @tanhf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized tanh since RISCV has no such instruction.
|
|
define <4 x float> @tanh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @tanh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @tanh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @tanhf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @tanhf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @tanhf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @tanhf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.tanh.f32(float)
|
|
|
|
; We can not vectorized tanh since RISCV has no such instruction.
|
|
define <4 x float> @int_tanh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_tanh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_tanh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.tanh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.tanh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.tanh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.tanh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @atanhf(float) readonly nounwind willreturn
|
|
|
|
; We can not vectorized atanh since RISCV has no such instruction.
|
|
define <4 x float> @atanh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @atanh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @atanh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @atanhf(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @atanhf(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @atanhf(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @atanhf(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
declare float @llvm.atanh.f32(float)
|
|
|
|
; We can not vectorized atanh since RISCV has no such instruction.
|
|
define <4 x float> @int_atanh_4x(ptr %a) {
|
|
; CHECK-LABEL: define <4 x float> @int_atanh_4x
|
|
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
|
|
; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
|
|
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
|
|
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
|
|
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
; DEFAULT-LABEL: define <4 x float> @int_atanh_4x
|
|
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: entry:
|
|
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
|
|
; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
|
|
; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
|
|
; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
|
|
; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
|
|
; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
|
|
; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
|
|
; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
|
|
; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
|
|
; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
|
|
; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
|
|
; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
|
|
; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
|
|
; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
|
|
;
|
|
entry:
|
|
%0 = load <4 x float>, ptr %a, align 16
|
|
%vecext = extractelement <4 x float> %0, i32 0
|
|
%1 = tail call fast float @llvm.atanh.f32(float %vecext)
|
|
%vecins = insertelement <4 x float> poison, float %1, i32 0
|
|
%vecext.1 = extractelement <4 x float> %0, i32 1
|
|
%2 = tail call fast float @llvm.atanh.f32(float %vecext.1)
|
|
%vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
|
|
%vecext.2 = extractelement <4 x float> %0, i32 2
|
|
%3 = tail call fast float @llvm.atanh.f32(float %vecext.2)
|
|
%vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
|
|
%vecext.3 = extractelement <4 x float> %0, i32 3
|
|
%4 = tail call fast float @llvm.atanh.f32(float %vecext.3)
|
|
%vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
|
|
ret <4 x float> %vecins.3
|
|
}
|
|
|
|
define void @f(i1 %c, ptr %p, ptr %q, ptr %r) {
|
|
; CHECK-LABEL: define void @f
|
|
; CHECK-SAME: (i1 [[C:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) #[[ATTR1]] {
|
|
; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[P]], align 8
|
|
; CHECK-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P]], i64 1
|
|
; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[P1]], align 8
|
|
; CHECK-NEXT: br i1 [[C]], label [[FOO:%.*]], label [[BAR:%.*]]
|
|
; CHECK: foo:
|
|
; CHECK-NEXT: [[Y0:%.*]] = load float, ptr [[R]], align 4
|
|
; CHECK-NEXT: [[Y1:%.*]] = call float @fabsf(float [[Y0]])
|
|
; CHECK-NEXT: br label [[BAZ:%.*]]
|
|
; CHECK: bar:
|
|
; CHECK-NEXT: [[Z0:%.*]] = load float, ptr [[R]], align 4
|
|
; CHECK-NEXT: [[Z1:%.*]] = call float @fabsf(float [[Z0]])
|
|
; CHECK-NEXT: br label [[BAZ]]
|
|
; CHECK: baz:
|
|
; CHECK-NEXT: store i64 [[X0]], ptr [[Q]], align 8
|
|
; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[Q]], i64 1
|
|
; CHECK-NEXT: store i64 [[X1]], ptr [[Q1]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; DEFAULT-LABEL: define void @f
|
|
; DEFAULT-SAME: (i1 [[C:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) #[[ATTR1]] {
|
|
; DEFAULT-NEXT: [[X0:%.*]] = load i64, ptr [[P]], align 8
|
|
; DEFAULT-NEXT: [[P1:%.*]] = getelementptr i64, ptr [[P]], i64 1
|
|
; DEFAULT-NEXT: [[X1:%.*]] = load i64, ptr [[P1]], align 8
|
|
; DEFAULT-NEXT: br i1 [[C]], label [[FOO:%.*]], label [[BAR:%.*]]
|
|
; DEFAULT: foo:
|
|
; DEFAULT-NEXT: [[Y0:%.*]] = load float, ptr [[R]], align 4
|
|
; DEFAULT-NEXT: [[Y1:%.*]] = call float @fabsf(float [[Y0]])
|
|
; DEFAULT-NEXT: br label [[BAZ:%.*]]
|
|
; DEFAULT: bar:
|
|
; DEFAULT-NEXT: [[Z0:%.*]] = load float, ptr [[R]], align 4
|
|
; DEFAULT-NEXT: [[Z1:%.*]] = call float @fabsf(float [[Z0]])
|
|
; DEFAULT-NEXT: br label [[BAZ]]
|
|
; DEFAULT: baz:
|
|
; DEFAULT-NEXT: store i64 [[X0]], ptr [[Q]], align 8
|
|
; DEFAULT-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[Q]], i64 1
|
|
; DEFAULT-NEXT: store i64 [[X1]], ptr [[Q1]], align 8
|
|
; DEFAULT-NEXT: ret void
|
|
;
|
|
%x0 = load i64, ptr %p
|
|
%p1 = getelementptr i64, ptr %p, i64 1
|
|
%x1 = load i64, ptr %p1
|
|
br i1 %c, label %foo, label %bar
|
|
foo:
|
|
%y0 = load float, ptr %r
|
|
%y1 = call float @fabsf(float %y0)
|
|
br label %baz
|
|
bar:
|
|
%z0 = load float, ptr %r
|
|
%z1 = call float @fabsf(float %z0)
|
|
br label %baz
|
|
baz:
|
|
store i64 %x0, ptr %q
|
|
%q1 = getelementptr i64, ptr %q, i64 1
|
|
store i64 %x1, ptr %q1
|
|
|
|
ret void
|
|
}
|