AMDGPU has native instructions and target intrinsics for this, but these really should be subject to legalization and generic optimizations. This will enable legalization of f16->f32 on targets without f16 support. Implement a somewhat horrible inline expansion for targets without libcall support. This could be better if we could introduce control flow (GlobalISel version not yet implemented). Support for strictfp legalization is less complete but works for the simple cases.
224 lines
7.6 KiB
LLVM
224 lines
7.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
|
|
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
|
|
|
|
define float @ldexp_f32(i8 zeroext %x) {
|
|
; CHECK-LABEL: ldexp_f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: stdu r1, -32(r1)
|
|
; CHECK-NEXT: std r0, 48(r1)
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset lr, 16
|
|
; CHECK-NEXT: vspltisw v2, 1
|
|
; CHECK-NEXT: mr r4, r3
|
|
; CHECK-NEXT: xvcvsxwdp vs1, v2
|
|
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: addi r1, r1, 32
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
%zext = zext i8 %x to i32
|
|
%ldexp = call float @llvm.ldexp.f32.i32(float 1.000000e+00, i32 %zext)
|
|
ret float %ldexp
|
|
}
|
|
|
|
define double @ldexp_f64(i8 zeroext %x) {
|
|
; CHECK-LABEL: ldexp_f64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: stdu r1, -32(r1)
|
|
; CHECK-NEXT: std r0, 48(r1)
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset lr, 16
|
|
; CHECK-NEXT: vspltisw v2, 1
|
|
; CHECK-NEXT: mr r4, r3
|
|
; CHECK-NEXT: xvcvsxwdp vs1, v2
|
|
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; CHECK-NEXT: bl ldexp
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: addi r1, r1, 32
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
%zext = zext i8 %x to i32
|
|
%ldexp = call double @llvm.ldexp.f64.i32(double 1.000000e+00, i32 %zext)
|
|
ret double %ldexp
|
|
}
|
|
|
|
define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) {
|
|
; CHECK-LABEL: ldexp_v2f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: stdu r1, -96(r1)
|
|
; CHECK-NEXT: std r0, 112(r1)
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 96
|
|
; CHECK-NEXT: .cfi_offset lr, 16
|
|
; CHECK-NEXT: .cfi_offset v28, -64
|
|
; CHECK-NEXT: .cfi_offset v29, -48
|
|
; CHECK-NEXT: .cfi_offset v30, -32
|
|
; CHECK-NEXT: .cfi_offset v31, -16
|
|
; CHECK-NEXT: li r3, 12
|
|
; CHECK-NEXT: xscvspdpn f1, v2
|
|
; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill
|
|
; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill
|
|
; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill
|
|
; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill
|
|
; CHECK-NEXT: vmr v31, v3
|
|
; CHECK-NEXT: vmr v30, v2
|
|
; CHECK-NEXT: vextuwrx r4, r3, v3
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: xxswapd vs0, v30
|
|
; CHECK-NEXT: li r3, 4
|
|
; CHECK-NEXT: xscpsgndp v29, f1, f1
|
|
; CHECK-NEXT: xscvspdpn f1, vs0
|
|
; CHECK-NEXT: vextuwrx r4, r3, v31
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
|
; CHECK-NEXT: xxmrghd vs0, v29, vs1
|
|
; CHECK-NEXT: li r3, 0
|
|
; CHECK-NEXT: vextuwrx r4, r3, v31
|
|
; CHECK-NEXT: xvcvdpsp v28, vs0
|
|
; CHECK-NEXT: xxsldwi vs0, v30, v30, 3
|
|
; CHECK-NEXT: xscvspdpn f1, vs0
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: xxsldwi vs0, v30, v30, 1
|
|
; CHECK-NEXT: xscpsgndp v29, f1, f1
|
|
; CHECK-NEXT: mfvsrwz r4, v31
|
|
; CHECK-NEXT: xscvspdpn f1, vs0
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
|
; CHECK-NEXT: xxmrghd vs0, vs1, v29
|
|
; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload
|
|
; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload
|
|
; CHECK-NEXT: lxv v29, 48(r1) # 16-byte Folded Reload
|
|
; CHECK-NEXT: xvcvdpsp v2, vs0
|
|
; CHECK-NEXT: vmrgew v2, v28, v2
|
|
; CHECK-NEXT: lxv v28, 32(r1) # 16-byte Folded Reload
|
|
; CHECK-NEXT: addi r1, r1, 96
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
%1 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %val, <2 x i32> %exp)
|
|
ret <2 x float> %1
|
|
}
|
|
|
|
define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) {
|
|
; CHECK-LABEL: ldexp_v4f32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: stdu r1, -96(r1)
|
|
; CHECK-NEXT: std r0, 112(r1)
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 96
|
|
; CHECK-NEXT: .cfi_offset lr, 16
|
|
; CHECK-NEXT: .cfi_offset v28, -64
|
|
; CHECK-NEXT: .cfi_offset v29, -48
|
|
; CHECK-NEXT: .cfi_offset v30, -32
|
|
; CHECK-NEXT: .cfi_offset v31, -16
|
|
; CHECK-NEXT: li r3, 12
|
|
; CHECK-NEXT: xscvspdpn f1, v2
|
|
; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill
|
|
; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill
|
|
; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill
|
|
; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill
|
|
; CHECK-NEXT: vmr v31, v3
|
|
; CHECK-NEXT: vmr v30, v2
|
|
; CHECK-NEXT: vextuwrx r4, r3, v3
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: xxswapd vs0, v30
|
|
; CHECK-NEXT: li r3, 4
|
|
; CHECK-NEXT: xscpsgndp v29, f1, f1
|
|
; CHECK-NEXT: xscvspdpn f1, vs0
|
|
; CHECK-NEXT: vextuwrx r4, r3, v31
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
|
; CHECK-NEXT: xxmrghd vs0, v29, vs1
|
|
; CHECK-NEXT: li r3, 0
|
|
; CHECK-NEXT: vextuwrx r4, r3, v31
|
|
; CHECK-NEXT: xvcvdpsp v28, vs0
|
|
; CHECK-NEXT: xxsldwi vs0, v30, v30, 3
|
|
; CHECK-NEXT: xscvspdpn f1, vs0
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: xxsldwi vs0, v30, v30, 1
|
|
; CHECK-NEXT: xscpsgndp v29, f1, f1
|
|
; CHECK-NEXT: mfvsrwz r4, v31
|
|
; CHECK-NEXT: xscvspdpn f1, vs0
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
|
|
; CHECK-NEXT: xxmrghd vs0, vs1, v29
|
|
; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload
|
|
; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload
|
|
; CHECK-NEXT: lxv v29, 48(r1) # 16-byte Folded Reload
|
|
; CHECK-NEXT: xvcvdpsp v2, vs0
|
|
; CHECK-NEXT: vmrgew v2, v28, v2
|
|
; CHECK-NEXT: lxv v28, 32(r1) # 16-byte Folded Reload
|
|
; CHECK-NEXT: addi r1, r1, 96
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
%1 = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %val, <4 x i32> %exp)
|
|
ret <4 x float> %1
|
|
}
|
|
|
|
define half @ldexp_f16(half %arg0, i32 %arg1) {
|
|
; CHECK-LABEL: ldexp_f16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: stdu r1, -32(r1)
|
|
; CHECK-NEXT: std r0, 48(r1)
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset lr, 16
|
|
; CHECK-NEXT: xscvdphp f0, f1
|
|
; CHECK-NEXT: clrldi r4, r4, 32
|
|
; CHECK-NEXT: mffprwz r3, f0
|
|
; CHECK-NEXT: clrlwi r3, r3, 16
|
|
; CHECK-NEXT: mtfprwz f0, r3
|
|
; CHECK-NEXT: xscvhpdp f1, f0
|
|
; CHECK-NEXT: bl ldexpf
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: addi r1, r1, 32
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
%ldexp = call half @llvm.ldexp.f16.i32(half %arg0, i32 %arg1)
|
|
ret half %ldexp
|
|
}
|
|
|
|
define ppc_fp128 @ldexp_fp128(ppc_fp128 %arg0, i32 %arg1) {
|
|
; CHECK-LABEL: ldexp_fp128:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mflr r0
|
|
; CHECK-NEXT: stdu r1, -32(r1)
|
|
; CHECK-NEXT: std r0, 48(r1)
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: .cfi_offset lr, 16
|
|
; CHECK-NEXT: clrldi r5, r5, 32
|
|
; CHECK-NEXT: bl ldexpl
|
|
; CHECK-NEXT: nop
|
|
; CHECK-NEXT: addi r1, r1, 32
|
|
; CHECK-NEXT: ld r0, 16(r1)
|
|
; CHECK-NEXT: mtlr r0
|
|
; CHECK-NEXT: blr
|
|
%ldexp = call ppc_fp128 @llvm.ldexp.ppcf128.i32(ppc_fp128 %arg0, i32 %arg1)
|
|
ret ppc_fp128 %ldexp
|
|
}
|
|
|
|
declare double @llvm.ldexp.f64.i32(double, i32) #0
|
|
declare float @llvm.ldexp.f32.i32(float, i32) #0
|
|
declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0
|
|
declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0
|
|
declare half @llvm.ldexp.f16.i32(half, i32) #0
|
|
declare ppc_fp128 @llvm.ldexp.ppcf128.i32(ppc_fp128, i32) #0
|
|
|
|
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|