Files
clang-p2996/llvm/test/CodeGen/X86/ldexp-strict.ll
Matt Arsenault eece6ba283 IR: Add llvm.ldexp and llvm.experimental.constrained.ldexp intrinsics
AMDGPU has native instructions and target intrinsics for this, but
these really should be subject to legalization and generic
optimizations. This will enable legalization of f16->f32 on targets
without f16 support.

Implement a somewhat horrible inline expansion for targets without
libcall support. This could be better if we could introduce control
flow (GlobalISel version not yet implemented). Support for strictfp
legalization is less complete but works for the simple cases.
2023-06-06 17:07:18 -04:00

76 lines
3.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefixes=X64 %s
; XUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=WIN32 %s
; FIXME: Expansion support without libcalls
; FIXME: Implement f16->f32 promotion for strictfp
; define half @test_strict_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #2 {
; %result = call half @llvm.experimental.constrained.ldexp.f16.i32(half %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
; ret half %result
; }
define float @test_strict_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) #2 {
; X64-LABEL: test_strict_ldexp_f32_i32:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: movl %esi, %edi
; X64-NEXT: callq ldexpf@PLT
; X64-NEXT: popq %rax
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call float @llvm.experimental.constrained.ldexp.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret float %result
}
define double @test_strict_ldexp_f64_i32(ptr addrspace(1) %out, double %a, i32 %b) #2 {
; X64-LABEL: test_strict_ldexp_f64_i32:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: movl %esi, %edi
; X64-NEXT: callq ldexp@PLT
; X64-NEXT: popq %rax
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call double @llvm.experimental.constrained.ldexp.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret double %result
}
define <2 x float> @test_strict_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) #2 {
; X64-LABEL: test_strict_ldexp_v2f32_v2i32:
; X64: # %bb.0:
; X64-NEXT: subq $56, %rsp
; X64-NEXT: .cfi_def_cfa_offset 64
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; X64-NEXT: movd %xmm1, %edi
; X64-NEXT: callq ldexpf@PLT
; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
; X64-NEXT: # xmm1 = mem[1,1,1,1]
; X64-NEXT: movd %xmm1, %edi
; X64-NEXT: callq ldexpf@PLT
; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: addq $56, %rsp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
%result = call <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x float> %result
}
declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) #1
declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata) #1
declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata) #1
declare x86_fp80 @llvm.experimental.constrained.ldexp.f80.i32(x86_fp80, i32, metadata, metadata) #1
declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>, metadata, metadata) #1
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
attributes #2 = { strictfp }