Files
clang-p2996/llvm/test/CodeGen/ARM/llvm.exp10.ll
Matt Arsenault b14e83d1a4 IR: Add llvm.exp10 intrinsic
We currently have log, log2, log10, exp and exp2 intrinsics. Add exp10
to fix this asymmetry. AMDGPU already has most of the code for f32
exp10 expansion implemented alongside exp, so the current
implementation is duplicating nearly identical effort between the
compiler and library which is inconvenient.

https://reviews.llvm.org/D157871
2023-09-01 19:45:03 -04:00

319 lines
9.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=thumbv7-unknown-linux < %s | FileCheck -check-prefixes=CHECK %s
declare half @llvm.exp10.f16(half)
declare <1 x half> @llvm.exp10.v1f16(<1 x half>)
declare <2 x half> @llvm.exp10.v2f16(<2 x half>)
declare <3 x half> @llvm.exp10.v3f16(<3 x half>)
declare <4 x half> @llvm.exp10.v4f16(<4 x half>)
declare float @llvm.exp10.f32(float)
declare <1 x float> @llvm.exp10.v1f32(<1 x float>)
declare <2 x float> @llvm.exp10.v2f32(<2 x float>)
declare <3 x float> @llvm.exp10.v3f32(<3 x float>)
declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
declare double @llvm.exp10.f64(double)
declare <1 x double> @llvm.exp10.v1f64(<1 x double>)
declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
declare <3 x double> @llvm.exp10.v3f64(<3 x double>)
declare <4 x double> @llvm.exp10.v4f64(<4 x double>)
define half @exp10_f16(half %x) {
; CHECK-LABEL: exp10_f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: pop {r7, pc}
%r = call half @llvm.exp10.f16(half %x)
ret half %r
}
define <1 x half> @exp10_v1f16(<1 x half> %x) {
; CHECK-LABEL: exp10_v1f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: pop {r7, pc}
%r = call <1 x half> @llvm.exp10.v1f16(<1 x half> %x)
ret <1 x half> %r
}
define <2 x half> @exp10_v2f16(<2 x half> %x) {
; CHECK-LABEL: exp10_v2f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: pop {r4, r5, r7, pc}
%r = call <2 x half> @llvm.exp10.v2f16(<2 x half> %x)
ret <2 x half> %r
}
define <3 x half> @exp10_v3f16(<3 x half> %x) {
; CHECK-LABEL: exp10_v3f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: mov r4, r2
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: pop {r4, r5, r6, pc}
%r = call <3 x half> @llvm.exp10.v3f16(<3 x half> %x)
ret <3 x half> %r
}
define <4 x half> @exp10_v4f16(<4 x half> %x) {
; CHECK-LABEL: exp10_v4f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: mov r4, r3
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
%r = call <4 x half> @llvm.exp10.v4f16(<4 x half> %x)
ret <4 x half> %r
}
define float @exp10_f32(float %x) {
; CHECK-LABEL: exp10_f32:
; CHECK: @ %bb.0:
; CHECK-NEXT: b exp10f
%r = call float @llvm.exp10.f32(float %x)
ret float %r
}
define <1 x float> @exp10_v1f32(<1 x float> %x) {
; CHECK-LABEL: exp10_v1f32:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: pop {r7, pc}
%r = call <1 x float> @llvm.exp10.v1f32(<1 x float> %x)
ret <1 x float> %r
}
define <2 x float> @exp10_v2f32(<2 x float> %x) {
; CHECK-LABEL: exp10_v2f32:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov d8, r0, r1
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: pop {r4, pc}
%r = call <2 x float> @llvm.exp10.v2f32(<2 x float> %x)
ret <2 x float> %r
}
define <3 x float> @exp10_v3f32(<3 x float> %x) {
; CHECK-LABEL: exp10_v3f32:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov d1, r2, r3
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov d0, r0, r1
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: vmov s17, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: vmov s16, r0
; CHECK-NEXT: vmov s18, r6
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, pc}
%r = call <3 x float> @llvm.exp10.v3f32(<3 x float> %x)
ret <3 x float> %r
}
define <4 x float> @exp10_v4f32(<4 x float> %x) {
; CHECK-LABEL: exp10_v4f32:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: mov r4, r3
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: vmov s19, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: vmov s18, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: vmov s17, r7
; CHECK-NEXT: bl exp10f
; CHECK-NEXT: vmov s16, r0
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
%r = call <4 x float> @llvm.exp10.v4f32(<4 x float> %x)
ret <4 x float> %r
}
define double @exp10_f64(double %x) {
; CHECK-LABEL: exp10_f64:
; CHECK: @ %bb.0:
; CHECK-NEXT: b exp10
%r = call double @llvm.exp10.f64(double %x)
ret double %r
}
; FIXME: Broken
; define <1 x double> @exp10_v1f64(<1 x double> %x) {
; %r = call <1 x double> @llvm.exp10.v1f64(<1 x double> %x)
; ret <1 x double> %r
; }
define <2 x double> @exp10_v2f64(<2 x double> %x) {
; CHECK-LABEL: exp10_v2f64:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: mov r4, r3
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: bl exp10
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl exp10
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r7
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
%r = call <2 x double> @llvm.exp10.v2f64(<2 x double> %x)
ret <2 x double> %r
}
define <3 x double> @exp10_v3f64(<3 x double> %x) {
; CHECK-LABEL: exp10_v3f64:
; CHECK: @ %bb.0:
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl exp10
; CHECK-NEXT: ldrd r2, r3, [sp, #24]
; CHECK-NEXT: vmov d8, r0, r1
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl exp10
; CHECK-NEXT: ldrd r2, r3, [sp, #32]
; CHECK-NEXT: vmov d9, r0, r1
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: vst1.64 {d8, d9}, [r4:128]!
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl exp10
; CHECK-NEXT: strd r0, r1, [r4]
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, pc}
%r = call <3 x double> @llvm.exp10.v3f64(<3 x double> %x)
ret <3 x double> %r
}
define <4 x double> @exp10_v4f64(<4 x double> %x) {
; CHECK-LABEL: exp10_v4f64:
; CHECK: @ %bb.0:
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl exp10
; CHECK-NEXT: add r2, sp, #64
; CHECK-NEXT: vmov d8, r0, r1
; CHECK-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: vmov r5, r8, d16
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: bl exp10
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: ldrd r0, r1, [sp, #56]
; CHECK-NEXT: bl exp10
; CHECK-NEXT: vmov d9, r0, r1
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: vmov d11, r7, r6
; CHECK-NEXT: bl exp10
; CHECK-NEXT: vmov d10, r0, r1
; CHECK-NEXT: vst1.64 {d8, d9}, [r4:128]!
; CHECK-NEXT: vst1.64 {d10, d11}, [r4:128]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
%r = call <4 x double> @llvm.exp10.v4f64(<4 x double> %x)
ret <4 x double> %r
}