Files
clang-p2996/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
David Green 2ec3ca7477 [ARM] Extend IsCMPZCSINC to handle CMOV
A 'CMOV 1, 0, CC, %cpsr, Cmp' is the same as a 'CSINC 0, 0, CC, Cmp',
and can be treated the same in IsCMPZCSINC added in D114013. This allows
us to remove the unnecessary CMOV in the same way that we could remove a
CSINC.

Differential Revision: https://reviews.llvm.org/D115188
2021-12-27 14:15:03 +00:00

4777 lines
174 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=armv7a-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
; RUN: llc < %s -mtriple=armv8a-none-eabihf -mattr=+neon,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
; i32 saturate
define <2 x i32> @stest_f64i32(<2 x double> %x) {
; CHECK-LABEL: stest_f64i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: adr r2, .LCPI0_0
; CHECK-NEXT: vld1.64 {d8, d9}, [r2:128]
; CHECK-NEXT: vmov.32 d10[0], r4
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: subs r4, r4, r3
; CHECK-NEXT: sbcs r4, r5, #0
; CHECK-NEXT: vmov.32 d11[0], r0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: vmov.32 d11[1], r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: vmov.i32 q10, #0x80000000
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: vmov.32 d10[1], r5
; CHECK-NEXT: mvnne r4, #0
; CHECK-NEXT: vdup.32 d17, r0
; CHECK-NEXT: vdup.32 d16, r4
; CHECK-NEXT: mvn r4, #0
; CHECK-NEXT: vbsl q8, q5, q4
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r3, r5, d17
; CHECK-NEXT: rsbs r0, r0, #-2147483648
; CHECK-NEXT: sbcs r0, r4, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: rsbs r1, r3, #-2147483648
; CHECK-NEXT: sbcs r1, r4, r5
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vdup.32 d19, r2
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: vbif q8, q10, q9
; CHECK-NEXT: vmovn.i64 d0, q8
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r11, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI0_0:
; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
; CHECK-NEXT: .long 0 @ 0x0
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
%0 = icmp slt <2 x i64> %conv, <i64 2147483647, i64 2147483647>
%spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>
%1 = icmp sgt <2 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648>
%spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>
%conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
ret <2 x i32> %conv6
}
define <2 x i32> @utest_f64i32(<2 x double> %x) {
; CHECK-LABEL: utest_f64i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: vmov.32 d9[0], r4
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: mvn r3, #0
; CHECK-NEXT: vmov.32 d8[0], r0
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: subs r1, r4, r3
; CHECK-NEXT: sbcs r1, r5, #0
; CHECK-NEXT: movwlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vdup.32 d17, r2
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: vdup.32 d16, r0
; CHECK-NEXT: vand q9, q4, q8
; CHECK-NEXT: vorn q8, q9, q8
; CHECK-NEXT: vmovn.i64 d0, q8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r11, pc}
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
%0 = icmp ult <2 x i64> %conv, <i64 4294967295, i64 4294967295>
%spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
%conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
ret <2 x i32> %conv6
}
define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: vmov.32 d8[0], r4
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mvn r3, #0
; CHECK-NEXT: subs r4, r4, r3
; CHECK-NEXT: sbcs r4, r5, #0
; CHECK-NEXT: vmov.32 d9[0], r0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: vmov.i64 q9, #0xffffffff
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: subs r0, r0, r3
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: vmov.32 d9[1], r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: vmov.32 d8[1], r5
; CHECK-NEXT: mvnne r4, #0
; CHECK-NEXT: vdup.32 d17, r0
; CHECK-NEXT: vdup.32 d16, r4
; CHECK-NEXT: vbsl q8, q4, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r3, r5, d17
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: rsbs r1, r3, #0
; CHECK-NEXT: rscs r1, r5, #0
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vmov.32 d19[0], r2
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: vmov.32 d18[0], r0
; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmovn.i64 d0, q8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r11, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
%0 = icmp slt <2 x i64> %conv, <i64 4294967295, i64 4294967295>
%spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
%1 = icmp sgt <2 x i64> %spec.store.select, zeroinitializer
%spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> zeroinitializer
%conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
ret <2 x i32> %conv6
}
define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-LABEL: stest_f32i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: vmov r6, s17
; CHECK-NEXT: vmov r10, s19
; CHECK-NEXT: vmov.32 d8[0], r7
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov.32 d10[0], r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: vmov.32 d9[0], r0
; CHECK-NEXT: mov r0, r10
; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mvn r6, #-2147483648
; CHECK-NEXT: subs r3, r7, r6
; CHECK-NEXT: sbcs r3, r8, #0
; CHECK-NEXT: vmov.32 d11[0], r0
; CHECK-NEXT: mov r3, #0
; CHECK-NEXT: adr r2, .LCPI3_0
; CHECK-NEXT: movwlt r3, #1
; CHECK-NEXT: subs r7, r5, r6
; CHECK-NEXT: sbcs r7, r4, #0
; CHECK-NEXT: vmov.32 d11[1], r1
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mvnne r7, #0
; CHECK-NEXT: subs r0, r0, r6
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128]
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: vmov.32 d10[1], r4
; CHECK-NEXT: vdup.32 d17, r0
; CHECK-NEXT: subs r0, r9, r6
; CHECK-NEXT: sbcs r0, r11, #0
; CHECK-NEXT: vdup.32 d16, r7
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: vbsl q8, q5, q9
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vmov.32 d9[1], r11
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mvn r6, #0
; CHECK-NEXT: vdup.32 d21, r0
; CHECK-NEXT: mvnne r3, #0
; CHECK-NEXT: vmov.32 d8[1], r8
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vdup.32 d20, r3
; CHECK-NEXT: vbit q9, q4, q10
; CHECK-NEXT: adr r5, .LCPI3_1
; CHECK-NEXT: vld1.64 {d20, d21}, [r5:128]
; CHECK-NEXT: vmov r5, r4, d17
; CHECK-NEXT: vmov r3, r7, d18
; CHECK-NEXT: rsbs r0, r0, #-2147483648
; CHECK-NEXT: sbcs r0, r6, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: rsbs r1, r3, #-2147483648
; CHECK-NEXT: vmov r1, r3, d19
; CHECK-NEXT: sbcs r7, r6, r7
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: rsbs r5, r5, #-2147483648
; CHECK-NEXT: sbcs r5, r6, r4
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: rsbs r1, r1, #-2147483648
; CHECK-NEXT: sbcs r1, r6, r3
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: mvnne r5, #0
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: vdup.32 d25, r5
; CHECK-NEXT: mvnne r7, #0
; CHECK-NEXT: vdup.32 d23, r2
; CHECK-NEXT: vdup.32 d24, r0
; CHECK-NEXT: vbif q8, q10, q12
; CHECK-NEXT: vdup.32 d22, r7
; CHECK-NEXT: vbif q9, q10, q11
; CHECK-NEXT: vmovn.i64 d1, q8
; CHECK-NEXT: vmovn.i64 d0, q9
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI3_0:
; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .LCPI3_1:
; CHECK-NEXT: .long 2147483648 @ 0x80000000
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 2147483648 @ 0x80000000
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
%0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
%spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
%1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
%spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
%conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-LABEL: utest_f32i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: mov r9, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: vmov r6, s19
; CHECK-NEXT: vmov r7, s18
; CHECK-NEXT: vmov.32 d9[0], r9
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov.32 d8[0], r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: vmov.32 d11[0], r0
; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: mvn r7, #0
; CHECK-NEXT: subs r2, r5, r7
; CHECK-NEXT: sbcs r2, r4, #0
; CHECK-NEXT: vmov.32 d10[0], r0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: mov r3, #0
; CHECK-NEXT: movwlo r2, #1
; CHECK-NEXT: subs r0, r0, r7
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlo r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: subs r1, r6, r7
; CHECK-NEXT: sbcs r1, r10, #0
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movwlo r1, #1
; CHECK-NEXT: subs r7, r9, r7
; CHECK-NEXT: sbcs r7, r8, #0
; CHECK-NEXT: movwlo r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mvnne r3, #0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvnne r1, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vdup.32 d19, r1
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: vdup.32 d17, r3
; CHECK-NEXT: vdup.32 d18, r0
; CHECK-NEXT: vand q10, q5, q9
; CHECK-NEXT: vdup.32 d16, r2
; CHECK-NEXT: vand q11, q4, q8
; CHECK-NEXT: vorn q9, q10, q9
; CHECK-NEXT: vorn q8, q11, q8
; CHECK-NEXT: vmovn.i64 d1, q9
; CHECK-NEXT: vmovn.i64 d0, q8
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
%0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r2, r0
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmov.32 d16[0], r2
; CHECK-NEXT: mvn r4, #0
; CHECK-NEXT: subs r2, r2, r4
; CHECK-NEXT: vmov r8, s19
; CHECK-NEXT: sbcs r2, r1, #0
; CHECK-NEXT: vmov.32 d17[0], r5
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: vmov.i64 q5, #0xffffffff
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: subs r3, r5, r4
; CHECK-NEXT: sbcs r3, r6, #0
; CHECK-NEXT: vmov.32 d17[1], r6
; CHECK-NEXT: mov r3, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mvnne r3, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vdup.32 d19, r3
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: vdup.32 d18, r2
; CHECK-NEXT: vmov.32 d16[1], r1
; CHECK-NEXT: vorr q4, q9, q9
; CHECK-NEXT: vbsl q4, q8, q5
; CHECK-NEXT: vmov r10, r9, d8
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: vmov.32 d12[0], r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r6, r1
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: subs r2, r5, r4
; CHECK-NEXT: vmov.32 d13[0], r0
; CHECK-NEXT: sbcs r2, r6, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: subs r0, r0, r4
; CHECK-NEXT: sbcs r0, r1, #0
; CHECK-NEXT: vmov.32 d13[1], r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: vmov r5, r4, d9
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vmov.32 d12[1], r6
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: vdup.32 d17, r0
; CHECK-NEXT: rsbs r0, r10, #0
; CHECK-NEXT: vdup.32 d16, r2
; CHECK-NEXT: rscs r0, r9, #0
; CHECK-NEXT: vbsl q8, q6, q5
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: vmov r1, r2, d16
; CHECK-NEXT: vmov r3, r6, d17
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: rscs r1, r2, #0
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: movwlt r1, #1
; CHECK-NEXT: rsbs r2, r3, #0
; CHECK-NEXT: rscs r2, r6, #0
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: movwlt r2, #1
; CHECK-NEXT: rsbs r3, r5, #0
; CHECK-NEXT: rscs r3, r4, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mvnne r7, #0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: mvnne r2, #0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvnne r1, #0
; CHECK-NEXT: vmov.32 d21[0], r2
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vmov.32 d20[0], r1
; CHECK-NEXT: mvnne r0, #0
; CHECK-NEXT: vmov.32 d19[0], r7
; CHECK-NEXT: vand q8, q8, q10
; CHECK-NEXT: vmov.32 d18[0], r0
; CHECK-NEXT: vmovn.i64 d1, q8
; CHECK-NEXT: vand q9, q4, q9
; CHECK-NEXT: vmovn.i64 d0, q9
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
%0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
%spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
%conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i32:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: .pad #4
; CHECK-NEON-NEXT: sub sp, sp, #4
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s3
; CHECK-NEON-NEXT: vmov.f32 s18, s2
; CHECK-NEON-NEXT: vmov.f32 s20, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r9, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: vmov r10, s16
; CHECK-NEON-NEXT: mov r8, r1
; CHECK-NEON-NEXT: vmov r6, s20
; CHECK-NEON-NEXT: vmov.32 d8[0], r9
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov.32 d10[0], r0
; CHECK-NEON-NEXT: mov r0, r6
; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r11, r0
; CHECK-NEON-NEXT: vmov.32 d9[0], r0
; CHECK-NEON-NEXT: mov r0, r10
; CHECK-NEON-NEXT: mov r7, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mvn r6, #-2147483648
; CHECK-NEON-NEXT: subs r3, r9, r6
; CHECK-NEON-NEXT: sbcs r3, r8, #0
; CHECK-NEON-NEXT: vmov.32 d11[0], r0
; CHECK-NEON-NEXT: mov r3, #0
; CHECK-NEON-NEXT: adr r2, .LCPI6_0
; CHECK-NEON-NEXT: movwlt r3, #1
; CHECK-NEON-NEXT: subs r5, r5, r6
; CHECK-NEON-NEXT: sbcs r5, r4, #0
; CHECK-NEON-NEXT: vmov.32 d11[1], r1
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: movwlt r5, #1
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: mvnne r5, #0
; CHECK-NEON-NEXT: subs r0, r0, r6
; CHECK-NEON-NEXT: sbcs r0, r1, #0
; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r2:128]
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: mov r2, #0
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mvnne r0, #0
; CHECK-NEON-NEXT: vmov.32 d10[1], r4
; CHECK-NEON-NEXT: vdup.32 d17, r0
; CHECK-NEON-NEXT: subs r0, r11, r6
; CHECK-NEON-NEXT: sbcs r0, r7, #0
; CHECK-NEON-NEXT: vdup.32 d16, r5
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: vbsl q8, q5, q9
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: vmov.32 d9[1], r7
; CHECK-NEON-NEXT: mvnne r0, #0
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: mvn r6, #0
; CHECK-NEON-NEXT: vdup.32 d21, r0
; CHECK-NEON-NEXT: mvnne r3, #0
; CHECK-NEON-NEXT: vmov.32 d8[1], r8
; CHECK-NEON-NEXT: vmov r0, r1, d16
; CHECK-NEON-NEXT: vdup.32 d20, r3
; CHECK-NEON-NEXT: vbit q9, q4, q10
; CHECK-NEON-NEXT: adr r5, .LCPI6_1
; CHECK-NEON-NEXT: vld1.64 {d20, d21}, [r5:128]
; CHECK-NEON-NEXT: vmov r5, r4, d17
; CHECK-NEON-NEXT: vmov r3, r7, d18
; CHECK-NEON-NEXT: rsbs r0, r0, #-2147483648
; CHECK-NEON-NEXT: sbcs r0, r6, r1
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mvnne r0, #0
; CHECK-NEON-NEXT: rsbs r1, r3, #-2147483648
; CHECK-NEON-NEXT: vmov r1, r3, d19
; CHECK-NEON-NEXT: sbcs r7, r6, r7
; CHECK-NEON-NEXT: mov r7, #0
; CHECK-NEON-NEXT: movwlt r7, #1
; CHECK-NEON-NEXT: rsbs r5, r5, #-2147483648
; CHECK-NEON-NEXT: sbcs r5, r6, r4
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: movwlt r5, #1
; CHECK-NEON-NEXT: rsbs r1, r1, #-2147483648
; CHECK-NEON-NEXT: sbcs r1, r6, r3
; CHECK-NEON-NEXT: movwlt r2, #1
; CHECK-NEON-NEXT: cmp r2, #0
; CHECK-NEON-NEXT: mvnne r2, #0
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: mvnne r5, #0
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: vdup.32 d25, r5
; CHECK-NEON-NEXT: mvnne r7, #0
; CHECK-NEON-NEXT: vdup.32 d23, r2
; CHECK-NEON-NEXT: vdup.32 d24, r0
; CHECK-NEON-NEXT: vbif q8, q10, q12
; CHECK-NEON-NEXT: vdup.32 d22, r7
; CHECK-NEON-NEXT: vbif q9, q10, q11
; CHECK-NEON-NEXT: vmovn.i64 d1, q8
; CHECK-NEON-NEXT: vmovn.i64 d0, q9
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEON-NEXT: add sp, sp, #4
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEON-NEXT: .p2align 4
; CHECK-NEON-NEXT: @ %bb.1:
; CHECK-NEON-NEXT: .LCPI6_0:
; CHECK-NEON-NEXT: .long 2147483647 @ 0x7fffffff
; CHECK-NEON-NEXT: .long 0 @ 0x0
; CHECK-NEON-NEXT: .long 2147483647 @ 0x7fffffff
; CHECK-NEON-NEXT: .long 0 @ 0x0
; CHECK-NEON-NEXT: .LCPI6_1:
; CHECK-NEON-NEXT: .long 2147483648 @ 0x80000000
; CHECK-NEON-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEON-NEXT: .long 2147483648 @ 0x80000000
; CHECK-NEON-NEXT: .long 4294967295 @ 0xffffffff
;
; CHECK-FP16-LABEL: stest_f16i32:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13}
; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13}
; CHECK-FP16-NEXT: .vsave {d8}
; CHECK-FP16-NEXT: vpush {d8}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[0]
; CHECK-FP16-NEXT: vorr d8, d0, d0
; CHECK-FP16-NEXT: vmov.u16 r6, d0[1]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
; CHECK-FP16-NEXT: mov r8, r1
; CHECK-FP16-NEXT: vmov.32 d10[0], r4
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov s0, r6
; CHECK-FP16-NEXT: mov r5, r0
; CHECK-FP16-NEXT: mov r7, r1
; CHECK-FP16-NEXT: vmov.32 d12[0], r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: mov r9, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[3]
; CHECK-FP16-NEXT: mov r10, r1
; CHECK-FP16-NEXT: vmov.32 d11[0], r9
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: mvn r6, #-2147483648
; CHECK-FP16-NEXT: subs r3, r4, r6
; CHECK-FP16-NEXT: sbcs r3, r8, #0
; CHECK-FP16-NEXT: vmov.32 d13[0], r0
; CHECK-FP16-NEXT: mov r3, #0
; CHECK-FP16-NEXT: adr r2, .LCPI6_0
; CHECK-FP16-NEXT: movwlt r3, #1
; CHECK-FP16-NEXT: subs r5, r5, r6
; CHECK-FP16-NEXT: sbcs r5, r7, #0
; CHECK-FP16-NEXT: vmov.32 d13[1], r1
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movwlt r5, #1
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: mvnne r5, #0
; CHECK-FP16-NEXT: subs r0, r0, r6
; CHECK-FP16-NEXT: sbcs r0, r1, #0
; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r2:128]
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: mov r2, #0
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: mvnne r0, #0
; CHECK-FP16-NEXT: vmov.32 d12[1], r7
; CHECK-FP16-NEXT: vdup.32 d17, r0
; CHECK-FP16-NEXT: subs r0, r9, r6
; CHECK-FP16-NEXT: sbcs r0, r10, #0
; CHECK-FP16-NEXT: vdup.32 d16, r5
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: vbsl q8, q6, q9
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: vmov.32 d11[1], r10
; CHECK-FP16-NEXT: mvnne r0, #0
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: mvn r6, #0
; CHECK-FP16-NEXT: vdup.32 d21, r0
; CHECK-FP16-NEXT: mvnne r3, #0
; CHECK-FP16-NEXT: vmov.32 d10[1], r8
; CHECK-FP16-NEXT: vmov r0, r1, d16
; CHECK-FP16-NEXT: vdup.32 d20, r3
; CHECK-FP16-NEXT: vbit q9, q5, q10
; CHECK-FP16-NEXT: adr r5, .LCPI6_1
; CHECK-FP16-NEXT: vld1.64 {d20, d21}, [r5:128]
; CHECK-FP16-NEXT: vmov r5, r4, d17
; CHECK-FP16-NEXT: vmov r3, r7, d18
; CHECK-FP16-NEXT: rsbs r0, r0, #-2147483648
; CHECK-FP16-NEXT: sbcs r0, r6, r1
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: mvnne r0, #0
; CHECK-FP16-NEXT: rsbs r1, r3, #-2147483648
; CHECK-FP16-NEXT: vmov r1, r3, d19
; CHECK-FP16-NEXT: sbcs r7, r6, r7
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: movwlt r7, #1
; CHECK-FP16-NEXT: rsbs r5, r5, #-2147483648
; CHECK-FP16-NEXT: sbcs r5, r6, r4
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movwlt r5, #1
; CHECK-FP16-NEXT: rsbs r1, r1, #-2147483648
; CHECK-FP16-NEXT: sbcs r1, r6, r3
; CHECK-FP16-NEXT: movwlt r2, #1
; CHECK-FP16-NEXT: cmp r2, #0
; CHECK-FP16-NEXT: mvnne r2, #0
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: mvnne r5, #0
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: vdup.32 d25, r5
; CHECK-FP16-NEXT: mvnne r7, #0
; CHECK-FP16-NEXT: vdup.32 d23, r2
; CHECK-FP16-NEXT: vdup.32 d24, r0
; CHECK-FP16-NEXT: vbif q8, q10, q12
; CHECK-FP16-NEXT: vdup.32 d22, r7
; CHECK-FP16-NEXT: vbif q9, q10, q11
; CHECK-FP16-NEXT: vmovn.i64 d1, q8
; CHECK-FP16-NEXT: vmovn.i64 d0, q9
; CHECK-FP16-NEXT: vpop {d8}
; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
; CHECK-FP16-NEXT: .p2align 4
; CHECK-FP16-NEXT: @ %bb.1:
; CHECK-FP16-NEXT: .LCPI6_0:
; CHECK-FP16-NEXT: .long 2147483647 @ 0x7fffffff
; CHECK-FP16-NEXT: .long 0 @ 0x0
; CHECK-FP16-NEXT: .long 2147483647 @ 0x7fffffff
; CHECK-FP16-NEXT: .long 0 @ 0x0
; CHECK-FP16-NEXT: .LCPI6_1:
; CHECK-FP16-NEXT: .long 2147483648 @ 0x80000000
; CHECK-FP16-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-FP16-NEXT: .long 2147483648 @ 0x80000000
; CHECK-FP16-NEXT: .long 4294967295 @ 0xffffffff
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
%spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
%1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
%spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
%conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i32:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: .vsave {d12, d13}
; CHECK-NEON-NEXT: vpush {d12, d13}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
; CHECK-NEON-NEXT: vpush {d8, d9, d10}
; CHECK-NEON-NEXT: vmov r0, s3
; CHECK-NEON-NEXT: vmov.f32 s16, s2
; CHECK-NEON-NEXT: vmov.f32 s18, s1
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
; CHECK-NEON-NEXT: mov r10, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: mov r8, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: vmov r0, s20
; CHECK-NEON-NEXT: mov r9, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: mov r7, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov.32 d9[0], r10
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
; CHECK-NEON-NEXT: mvn r4, #0
; CHECK-NEON-NEXT: subs r2, r5, r4
; CHECK-NEON-NEXT: sbcs r2, r7, #0
; CHECK-NEON-NEXT: vmov.32 d8[0], r0
; CHECK-NEON-NEXT: mov r2, #0
; CHECK-NEON-NEXT: mov r3, #0
; CHECK-NEON-NEXT: movwlo r2, #1
; CHECK-NEON-NEXT: subs r0, r0, r4
; CHECK-NEON-NEXT: sbcs r0, r1, #0
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlo r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mvnne r0, #0
; CHECK-NEON-NEXT: subs r1, r10, r4
; CHECK-NEON-NEXT: sbcs r1, r8, #0
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: movwlo r1, #1
; CHECK-NEON-NEXT: subs r7, r6, r4
; CHECK-NEON-NEXT: sbcs r7, r9, #0
; CHECK-NEON-NEXT: movwlo r3, #1
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: mvnne r3, #0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvnne r1, #0
; CHECK-NEON-NEXT: cmp r2, #0
; CHECK-NEON-NEXT: vdup.32 d19, r1
; CHECK-NEON-NEXT: mvnne r2, #0
; CHECK-NEON-NEXT: vdup.32 d17, r3
; CHECK-NEON-NEXT: vdup.32 d18, r0
; CHECK-NEON-NEXT: vand q10, q4, q9
; CHECK-NEON-NEXT: vdup.32 d16, r2
; CHECK-NEON-NEXT: vand q11, q6, q8
; CHECK-NEON-NEXT: vorn q9, q10, q9
; CHECK-NEON-NEXT: vorn q8, q11, q8
; CHECK-NEON-NEXT: vmovn.i64 d1, q9
; CHECK-NEON-NEXT: vmovn.i64 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
; CHECK-NEON-NEXT: vpop {d12, d13}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
;
; CHECK-FP16-LABEL: utesth_f16i32:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vorr d8, d0, d0
; CHECK-FP16-NEXT: vmov.u16 r5, d0[3]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: mov r10, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
; CHECK-FP16-NEXT: mov r8, r1
; CHECK-FP16-NEXT: vmov.32 d11[0], r10
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: vmov s0, r5
; CHECK-FP16-NEXT: mov r6, r0
; CHECK-FP16-NEXT: mov r7, r1
; CHECK-FP16-NEXT: vmov.32 d10[0], r0
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: mov r5, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
; CHECK-FP16-NEXT: mov r9, r1
; CHECK-FP16-NEXT: vmov.32 d9[0], r5
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: mvn r4, #0
; CHECK-FP16-NEXT: subs r2, r6, r4
; CHECK-FP16-NEXT: sbcs r2, r7, #0
; CHECK-FP16-NEXT: vmov.32 d8[0], r0
; CHECK-FP16-NEXT: mov r2, #0
; CHECK-FP16-NEXT: mov r3, #0
; CHECK-FP16-NEXT: movwlo r2, #1
; CHECK-FP16-NEXT: subs r0, r0, r4
; CHECK-FP16-NEXT: sbcs r0, r1, #0
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movwlo r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: mvnne r0, #0
; CHECK-FP16-NEXT: subs r1, r5, r4
; CHECK-FP16-NEXT: sbcs r1, r9, #0
; CHECK-FP16-NEXT: mov r1, #0
; CHECK-FP16-NEXT: movwlo r1, #1
; CHECK-FP16-NEXT: subs r7, r10, r4
; CHECK-FP16-NEXT: sbcs r7, r8, #0
; CHECK-FP16-NEXT: movwlo r3, #1
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: mvnne r3, #0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: mvnne r1, #0
; CHECK-FP16-NEXT: cmp r2, #0
; CHECK-FP16-NEXT: vdup.32 d19, r1
; CHECK-FP16-NEXT: mvnne r2, #0
; CHECK-FP16-NEXT: vdup.32 d17, r3
; CHECK-FP16-NEXT: vdup.32 d18, r0
; CHECK-FP16-NEXT: vand q10, q4, q9
; CHECK-FP16-NEXT: vdup.32 d16, r2
; CHECK-FP16-NEXT: vand q11, q5, q8
; CHECK-FP16-NEXT: vorn q9, q10, q9
; CHECK-FP16-NEXT: vorn q8, q11, q8
; CHECK-FP16-NEXT: vmovn.i64 d1, q9
; CHECK-FP16-NEXT: vmovn.i64 d0, q8
; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
%0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NEON-LABEL: ustest_f16i32:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s3
; CHECK-NEON-NEXT: vmov.f32 s18, s2
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: vmov r0, s20
; CHECK-NEON-NEXT: mov r7, r1
; CHECK-NEON-NEXT: vmov r5, s18
; CHECK-NEON-NEXT: vmov r8, s16
; CHECK-NEON-NEXT: vmov.32 d9[0], r6
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: vmov.32 d8[0], r0
; CHECK-NEON-NEXT: mvn r9, #0
; CHECK-NEON-NEXT: subs r0, r0, r9
; CHECK-NEON-NEXT: mov r4, #0
; CHECK-NEON-NEXT: sbcs r0, r1, #0
; CHECK-NEON-NEXT: vmov.32 d9[1], r7
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: vmov.32 d8[1], r1
; CHECK-NEON-NEXT: mvnne r0, #0
; CHECK-NEON-NEXT: subs r1, r6, r9
; CHECK-NEON-NEXT: sbcs r1, r7, #0
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: movwlt r1, #1
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvnne r1, #0
; CHECK-NEON-NEXT: vdup.32 d13, r1
; CHECK-NEON-NEXT: vdup.32 d12, r0
; CHECK-NEON-NEXT: mov r0, r5
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov.i64 q5, #0xffffffff
; CHECK-NEON-NEXT: vbif q4, q5, q6
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: mov r0, r8
; CHECK-NEON-NEXT: mov r6, r1
; CHECK-NEON-NEXT: vmov r7, r10, d8
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: subs r2, r5, r9
; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: sbcs r2, r6, #0
; CHECK-NEON-NEXT: mov r2, #0
; CHECK-NEON-NEXT: movwlt r2, #1
; CHECK-NEON-NEXT: subs r0, r0, r9
; CHECK-NEON-NEXT: sbcs r0, r1, #0
; CHECK-NEON-NEXT: vmov.32 d13[1], r1
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mvnne r0, #0
; CHECK-NEON-NEXT: cmp r2, #0
; CHECK-NEON-NEXT: vmov.32 d12[1], r6
; CHECK-NEON-NEXT: mvnne r2, #0
; CHECK-NEON-NEXT: vdup.32 d17, r0
; CHECK-NEON-NEXT: rsbs r0, r7, #0
; CHECK-NEON-NEXT: vdup.32 d16, r2
; CHECK-NEON-NEXT: vmov r7, r5, d9
; CHECK-NEON-NEXT: vbsl q8, q6, q5
; CHECK-NEON-NEXT: rscs r0, r10, #0
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: vmov r1, r2, d16
; CHECK-NEON-NEXT: vmov r3, r6, d17
; CHECK-NEON-NEXT: rsbs r1, r1, #0
; CHECK-NEON-NEXT: rscs r1, r2, #0
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: movwlt r1, #1
; CHECK-NEON-NEXT: rsbs r2, r3, #0
; CHECK-NEON-NEXT: rscs r2, r6, #0
; CHECK-NEON-NEXT: mov r2, #0
; CHECK-NEON-NEXT: movwlt r2, #1
; CHECK-NEON-NEXT: rsbs r3, r7, #0
; CHECK-NEON-NEXT: rscs r3, r5, #0
; CHECK-NEON-NEXT: movwlt r4, #1
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: mvnne r4, #0
; CHECK-NEON-NEXT: cmp r2, #0
; CHECK-NEON-NEXT: mvnne r2, #0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvnne r1, #0
; CHECK-NEON-NEXT: vmov.32 d21[0], r2
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: vmov.32 d20[0], r1
; CHECK-NEON-NEXT: mvnne r0, #0
; CHECK-NEON-NEXT: vmov.32 d19[0], r4
; CHECK-NEON-NEXT: vand q8, q8, q10
; CHECK-NEON-NEXT: vmov.32 d18[0], r0
; CHECK-NEON-NEXT: vmovn.i64 d1, q8
; CHECK-NEON-NEXT: vand q9, q4, q9
; CHECK-NEON-NEXT: vmovn.i64 d0, q9
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
;
; CHECK-FP16-LABEL: ustest_f16i32:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13, d14, d15}
; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13, d14, d15}
; CHECK-FP16-NEXT: .vsave {d8}
; CHECK-FP16-NEXT: vpush {d8}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vorr d8, d0, d0
; CHECK-FP16-NEXT: vmov.u16 r8, d0[2]
; CHECK-FP16-NEXT: vmov.u16 r9, d0[3]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
; CHECK-FP16-NEXT: mov r5, r1
; CHECK-FP16-NEXT: vmov.32 d11[0], r4
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.32 d10[0], r0
; CHECK-FP16-NEXT: mvn r7, #0
; CHECK-FP16-NEXT: subs r0, r0, r7
; CHECK-FP16-NEXT: vmov.i64 q6, #0xffffffff
; CHECK-FP16-NEXT: sbcs r0, r1, #0
; CHECK-FP16-NEXT: vmov.32 d11[1], r5
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: vmov s0, r8
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: vmov.32 d10[1], r1
; CHECK-FP16-NEXT: mvnne r0, #0
; CHECK-FP16-NEXT: subs r1, r4, r7
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: sbcs r1, r5, #0
; CHECK-FP16-NEXT: vmov s16, r9
; CHECK-FP16-NEXT: mov r1, #0
; CHECK-FP16-NEXT: movwlt r1, #1
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: mvnne r1, #0
; CHECK-FP16-NEXT: vdup.32 d17, r1
; CHECK-FP16-NEXT: vdup.32 d16, r0
; CHECK-FP16-NEXT: vbif q5, q6, q8
; CHECK-FP16-NEXT: vmov r9, r8, d10
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.f32 s0, s16
; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: mov r5, r1
; CHECK-FP16-NEXT: vmov.32 d14[0], r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: subs r2, r4, r7
; CHECK-FP16-NEXT: vmov.32 d15[0], r0
; CHECK-FP16-NEXT: sbcs r2, r5, #0
; CHECK-FP16-NEXT: mov r2, #0
; CHECK-FP16-NEXT: movwlt r2, #1
; CHECK-FP16-NEXT: subs r0, r0, r7
; CHECK-FP16-NEXT: sbcs r0, r1, #0
; CHECK-FP16-NEXT: vmov.32 d15[1], r1
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: mvnne r0, #0
; CHECK-FP16-NEXT: cmp r2, #0
; CHECK-FP16-NEXT: vmov.32 d14[1], r5
; CHECK-FP16-NEXT: mvnne r2, #0
; CHECK-FP16-NEXT: vmov r5, r4, d11
; CHECK-FP16-NEXT: vdup.32 d17, r0
; CHECK-FP16-NEXT: rsbs r0, r9, #0
; CHECK-FP16-NEXT: vdup.32 d16, r2
; CHECK-FP16-NEXT: rscs r0, r8, #0
; CHECK-FP16-NEXT: vbsl q8, q7, q6
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: vmov r1, r2, d16
; CHECK-FP16-NEXT: vmov r3, r7, d17
; CHECK-FP16-NEXT: rsbs r1, r1, #0
; CHECK-FP16-NEXT: rscs r1, r2, #0
; CHECK-FP16-NEXT: mov r1, #0
; CHECK-FP16-NEXT: movwlt r1, #1
; CHECK-FP16-NEXT: rsbs r2, r3, #0
; CHECK-FP16-NEXT: rscs r2, r7, #0
; CHECK-FP16-NEXT: mov r2, #0
; CHECK-FP16-NEXT: movwlt r2, #1
; CHECK-FP16-NEXT: rsbs r3, r5, #0
; CHECK-FP16-NEXT: rscs r3, r4, #0
; CHECK-FP16-NEXT: movwlt r6, #1
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: mvnne r6, #0
; CHECK-FP16-NEXT: cmp r2, #0
; CHECK-FP16-NEXT: mvnne r2, #0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: mvnne r1, #0
; CHECK-FP16-NEXT: vmov.32 d21[0], r2
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: vmov.32 d20[0], r1
; CHECK-FP16-NEXT: mvnne r0, #0
; CHECK-FP16-NEXT: vmov.32 d19[0], r6
; CHECK-FP16-NEXT: vand q8, q8, q10
; CHECK-FP16-NEXT: vmov.32 d18[0], r0
; CHECK-FP16-NEXT: vmovn.i64 d1, q8
; CHECK-FP16-NEXT: vand q9, q5, q9
; CHECK-FP16-NEXT: vmovn.i64 d0, q9
; CHECK-FP16-NEXT: vpop {d8}
; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13, d14, d15}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
%spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
%conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
ret <4 x i32> %conv6
}
; i16 saturate
define <2 x i16> @stest_f64i16(<2 x double> %x) {
; CHECK-LABEL: stest_f64i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f64 s4, d0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vcvt.s32.f64 s0, d1
; CHECK-NEXT: vmov.i32 d17, #0x7fff
; CHECK-NEXT: vmvn.i32 d18, #0x7fff
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.32 d16[1], r0
; CHECK-NEXT: vmin.s32 d16, d16, d17
; CHECK-NEXT: vmax.s32 d0, d16, d18
; CHECK-NEXT: bx lr
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%0 = icmp slt <2 x i32> %conv, <i32 32767, i32 32767>
%spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>
%1 = icmp sgt <2 x i32> %spec.store.select, <i32 -32768, i32 -32768>
%spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>
%conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
ret <2 x i16> %conv6
}
define <2 x i16> @utest_f64i16(<2 x double> %x) {
; CHECK-LABEL: utest_f64i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.u32.f64 s4, d0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vcvt.u32.f64 s0, d1
; CHECK-NEXT: vmov.i32 d17, #0xffff
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.32 d16[1], r0
; CHECK-NEXT: vmin.u32 d0, d16, d17
; CHECK-NEXT: bx lr
entry:
%conv = fptoui <2 x double> %x to <2 x i32>
%0 = icmp ult <2 x i32> %conv, <i32 65535, i32 65535>
%spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
%conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
ret <2 x i16> %conv6
}
define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f64 s4, d0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vcvt.s32.f64 s0, d1
; CHECK-NEXT: vmov.i32 d17, #0xffff
; CHECK-NEXT: vmov.i32 d18, #0x0
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.32 d16[1], r0
; CHECK-NEXT: vmin.s32 d16, d16, d17
; CHECK-NEXT: vmax.s32 d0, d16, d18
; CHECK-NEXT: bx lr
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%0 = icmp slt <2 x i32> %conv, <i32 65535, i32 65535>
%spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
%1 = icmp sgt <2 x i32> %spec.store.select, zeroinitializer
%spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> zeroinitializer
%conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
ret <2 x i16> %conv6
}
define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-LABEL: stest_f32i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f32 q8, q0
; CHECK-NEXT: vmov.i32 q9, #0x7fff
; CHECK-NEXT: vmvn.i32 q10, #0x7fff
; CHECK-NEXT: vmin.s32 q8, q8, q9
; CHECK-NEXT: vmax.s32 q8, q8, q10
; CHECK-NEXT: vmovn.i32 d0, q8
; CHECK-NEXT: bx lr
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
%0 = icmp slt <4 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767>
%spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
%1 = icmp sgt <4 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
ret <4 x i16> %conv6
}
define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-LABEL: utest_f32i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.u32.f32 q8, q0
; CHECK-NEXT: vmov.i32 q9, #0xffff
; CHECK-NEXT: vmin.u32 q8, q8, q9
; CHECK-NEXT: vmovn.i32 d0, q8
; CHECK-NEXT: bx lr
entry:
%conv = fptoui <4 x float> %x to <4 x i32>
%0 = icmp ult <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
%spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
%conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
ret <4 x i16> %conv6
}
define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f32 q8, q0
; CHECK-NEXT: vmov.i32 q9, #0xffff
; CHECK-NEXT: vmov.i32 q10, #0x0
; CHECK-NEXT: vmin.s32 q8, q8, q9
; CHECK-NEXT: vmax.s32 q8, q8, q10
; CHECK-NEXT: vmovn.i32 d0, q8
; CHECK-NEXT: bx lr
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
%0 = icmp slt <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
%spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
%1 = icmp sgt <4 x i32> %spec.store.select, zeroinitializer
%spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> zeroinitializer
%conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
ret <4 x i16> %conv6
}
define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i16:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s7
; CHECK-NEON-NEXT: vmov.f32 s18, s6
; CHECK-NEON-NEXT: vmov.f32 s20, s5
; CHECK-NEON-NEXT: vmov.f32 s22, s4
; CHECK-NEON-NEXT: vmov.f32 s24, s3
; CHECK-NEON-NEXT: vmov.f32 s26, s2
; CHECK-NEON-NEXT: vmov.f32 s28, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r4, r0
; CHECK-NEON-NEXT: vmov r0, s26
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s22
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: vmov r0, s24
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r7, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov s22, r7
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s30, r6
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[1], r0
; CHECK-NEON-NEXT: vmov r0, s28
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r1, s20
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r5
; CHECK-NEON-NEXT: vcvt.s32.f32 s20, s2
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s30
; CHECK-NEON-NEXT: vmov.32 d8[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r0, s20
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r4
; CHECK-NEON-NEXT: vmov.i32 q8, #0x7fff
; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2
; CHECK-NEON-NEXT: vmvn.i32 q9, #0x7fff
; CHECK-NEON-NEXT: vmov.32 d9[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s22
; CHECK-NEON-NEXT: vmov.32 d12[1], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmin.s32 q10, q6, q8
; CHECK-NEON-NEXT: vmax.s32 q10, q10, q9
; CHECK-NEON-NEXT: vmov.32 d9[1], r0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: vmovn.i32 d1, q10
; CHECK-NEON-NEXT: vmov.32 d8[1], r0
; CHECK-NEON-NEXT: vmin.s32 q8, q4, q8
; CHECK-NEON-NEXT: vmax.s32 q8, q8, q9
; CHECK-NEON-NEXT: vmovn.i32 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i16:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s12, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s3
; CHECK-FP16-NEXT: vcvt.s32.f16 s5, s2
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s14, s1
; CHECK-FP16-NEXT: vmovx.f16 s10, s3
; CHECK-FP16-NEXT: vmovx.f16 s8, s2
; CHECK-FP16-NEXT: vcvt.s32.f16 s10, s10
; CHECK-FP16-NEXT: vcvt.s32.f16 s8, s8
; CHECK-FP16-NEXT: vmovx.f16 s6, s1
; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s6
; CHECK-FP16-NEXT: vmov.i32 q10, #0x7fff
; CHECK-FP16-NEXT: vmvn.i32 q11, #0x7fff
; CHECK-FP16-NEXT: vmov.32 d17[0], r0
; CHECK-FP16-NEXT: vmov r0, s5
; CHECK-FP16-NEXT: vmov.32 d16[0], r0
; CHECK-FP16-NEXT: vmov r0, s14
; CHECK-FP16-NEXT: vmov.32 d19[0], r0
; CHECK-FP16-NEXT: vmov r0, s12
; CHECK-FP16-NEXT: vmov.32 d18[0], r0
; CHECK-FP16-NEXT: vmov r0, s10
; CHECK-FP16-NEXT: vmov.32 d17[1], r0
; CHECK-FP16-NEXT: vmov r0, s8
; CHECK-FP16-NEXT: vmov.32 d16[1], r0
; CHECK-FP16-NEXT: vmov r0, s6
; CHECK-FP16-NEXT: vmin.s32 q8, q8, q10
; CHECK-FP16-NEXT: vmax.s32 q8, q8, q11
; CHECK-FP16-NEXT: vmovn.i32 d1, q8
; CHECK-FP16-NEXT: vmov.32 d19[1], r0
; CHECK-FP16-NEXT: vmov r0, s4
; CHECK-FP16-NEXT: vmov.32 d18[1], r0
; CHECK-FP16-NEXT: vmin.s32 q9, q9, q10
; CHECK-FP16-NEXT: vmax.s32 q9, q9, q11
; CHECK-FP16-NEXT: vmovn.i32 d0, q9
; CHECK-FP16-NEXT: bx lr
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
%0 = icmp slt <8 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
%1 = icmp sgt <8 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
%conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
ret <8 x i16> %conv6
}
define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i16:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s7
; CHECK-NEON-NEXT: vmov.f32 s18, s6
; CHECK-NEON-NEXT: vmov.f32 s20, s5
; CHECK-NEON-NEXT: vmov.f32 s22, s4
; CHECK-NEON-NEXT: vmov.f32 s24, s3
; CHECK-NEON-NEXT: vmov.f32 s26, s2
; CHECK-NEON-NEXT: vmov.f32 s28, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r4, r0
; CHECK-NEON-NEXT: vmov r0, s26
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s22
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: vmov r0, s24
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r7, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov s16, r7
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s18, r6
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[1], r0
; CHECK-NEON-NEXT: vmov r0, s28
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r1, s20
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r5
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s18
; CHECK-NEON-NEXT: vcvt.u32.f32 s18, s2
; CHECK-NEON-NEXT: vmov.32 d10[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r4
; CHECK-NEON-NEXT: vmov.i32 q8, #0xffff
; CHECK-NEON-NEXT: vcvt.u32.f32 s2, s2
; CHECK-NEON-NEXT: vmov.32 d11[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s16
; CHECK-NEON-NEXT: vmov.32 d12[1], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmin.u32 q9, q6, q8
; CHECK-NEON-NEXT: vmov.32 d11[1], r0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: vmovn.i32 d1, q9
; CHECK-NEON-NEXT: vmov.32 d10[1], r0
; CHECK-NEON-NEXT: vmin.u32 q8, q5, q8
; CHECK-NEON-NEXT: vmovn.i32 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: utesth_f16i16:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s0, s3
; CHECK-FP16-NEXT: vcvt.u32.f16 s5, s2
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s14, s1
; CHECK-FP16-NEXT: vmovx.f16 s10, s3
; CHECK-FP16-NEXT: vmovx.f16 s8, s2
; CHECK-FP16-NEXT: vcvt.u32.f16 s10, s10
; CHECK-FP16-NEXT: vcvt.u32.f16 s8, s8
; CHECK-FP16-NEXT: vmovx.f16 s6, s1
; CHECK-FP16-NEXT: vcvt.u32.f16 s4, s4
; CHECK-FP16-NEXT: vcvt.u32.f16 s6, s6
; CHECK-FP16-NEXT: vmov.i32 q10, #0xffff
; CHECK-FP16-NEXT: vmov.32 d17[0], r0
; CHECK-FP16-NEXT: vmov r0, s5
; CHECK-FP16-NEXT: vmov.32 d16[0], r0
; CHECK-FP16-NEXT: vmov r0, s14
; CHECK-FP16-NEXT: vmov.32 d19[0], r0
; CHECK-FP16-NEXT: vmov r0, s12
; CHECK-FP16-NEXT: vmov.32 d18[0], r0
; CHECK-FP16-NEXT: vmov r0, s10
; CHECK-FP16-NEXT: vmov.32 d17[1], r0
; CHECK-FP16-NEXT: vmov r0, s8
; CHECK-FP16-NEXT: vmov.32 d16[1], r0
; CHECK-FP16-NEXT: vmov r0, s6
; CHECK-FP16-NEXT: vmin.u32 q8, q8, q10
; CHECK-FP16-NEXT: vmovn.i32 d1, q8
; CHECK-FP16-NEXT: vmov.32 d19[1], r0
; CHECK-FP16-NEXT: vmov r0, s4
; CHECK-FP16-NEXT: vmov.32 d18[1], r0
; CHECK-FP16-NEXT: vmin.u32 q9, q9, q10
; CHECK-FP16-NEXT: vmovn.i32 d0, q9
; CHECK-FP16-NEXT: bx lr
entry:
%conv = fptoui <8 x half> %x to <8 x i32>
%0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
ret <8 x i16> %conv6
}
define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NEON-LABEL: ustest_f16i16:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s7
; CHECK-NEON-NEXT: vmov.f32 s18, s6
; CHECK-NEON-NEXT: vmov.f32 s20, s5
; CHECK-NEON-NEXT: vmov.f32 s22, s4
; CHECK-NEON-NEXT: vmov.f32 s24, s3
; CHECK-NEON-NEXT: vmov.f32 s26, s2
; CHECK-NEON-NEXT: vmov.f32 s28, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r4, r0
; CHECK-NEON-NEXT: vmov r0, s26
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s22
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: vmov r0, s24
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r7, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov s22, r7
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s30, r6
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[1], r0
; CHECK-NEON-NEXT: vmov r0, s28
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r1, s20
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r5
; CHECK-NEON-NEXT: vcvt.s32.f32 s20, s2
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s30
; CHECK-NEON-NEXT: vmov.32 d8[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r0, s20
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r4
; CHECK-NEON-NEXT: vmov.i32 q8, #0xffff
; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2
; CHECK-NEON-NEXT: vmov.i32 q9, #0x0
; CHECK-NEON-NEXT: vmov.32 d9[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s22
; CHECK-NEON-NEXT: vmov.32 d12[1], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmin.s32 q10, q6, q8
; CHECK-NEON-NEXT: vmax.s32 q10, q10, q9
; CHECK-NEON-NEXT: vmov.32 d9[1], r0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: vmovn.i32 d1, q10
; CHECK-NEON-NEXT: vmov.32 d8[1], r0
; CHECK-NEON-NEXT: vmin.s32 q8, q4, q8
; CHECK-NEON-NEXT: vmax.s32 q8, q8, q9
; CHECK-NEON-NEXT: vmovn.i32 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: ustest_f16i16:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s12, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s3
; CHECK-FP16-NEXT: vcvt.s32.f16 s5, s2
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s14, s1
; CHECK-FP16-NEXT: vmovx.f16 s10, s3
; CHECK-FP16-NEXT: vmovx.f16 s8, s2
; CHECK-FP16-NEXT: vcvt.s32.f16 s10, s10
; CHECK-FP16-NEXT: vcvt.s32.f16 s8, s8
; CHECK-FP16-NEXT: vmovx.f16 s6, s1
; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s6
; CHECK-FP16-NEXT: vmov.i32 q10, #0xffff
; CHECK-FP16-NEXT: vmov.i32 q11, #0x0
; CHECK-FP16-NEXT: vmov.32 d17[0], r0
; CHECK-FP16-NEXT: vmov r0, s5
; CHECK-FP16-NEXT: vmov.32 d16[0], r0
; CHECK-FP16-NEXT: vmov r0, s14
; CHECK-FP16-NEXT: vmov.32 d19[0], r0
; CHECK-FP16-NEXT: vmov r0, s12
; CHECK-FP16-NEXT: vmov.32 d18[0], r0
; CHECK-FP16-NEXT: vmov r0, s10
; CHECK-FP16-NEXT: vmov.32 d17[1], r0
; CHECK-FP16-NEXT: vmov r0, s8
; CHECK-FP16-NEXT: vmov.32 d16[1], r0
; CHECK-FP16-NEXT: vmov r0, s6
; CHECK-FP16-NEXT: vmin.s32 q8, q8, q10
; CHECK-FP16-NEXT: vmax.s32 q8, q8, q11
; CHECK-FP16-NEXT: vmovn.i32 d1, q8
; CHECK-FP16-NEXT: vmov.32 d19[1], r0
; CHECK-FP16-NEXT: vmov r0, s4
; CHECK-FP16-NEXT: vmov.32 d18[1], r0
; CHECK-FP16-NEXT: vmin.s32 q9, q9, q10
; CHECK-FP16-NEXT: vmax.s32 q9, q9, q11
; CHECK-FP16-NEXT: vmovn.i32 d0, q9
; CHECK-FP16-NEXT: bx lr
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
%0 = icmp slt <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%1 = icmp sgt <8 x i32> %spec.store.select, zeroinitializer
%spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> zeroinitializer
%conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
ret <8 x i16> %conv6
}
; i64 saturate
define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-LABEL: stest_f64i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mvn r8, #0
; CHECK-NEXT: subs r0, r0, r8
; CHECK-NEXT: mvn r6, #-2147483648
; CHECK-NEXT: sbcs r0, r1, r6
; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: sbcs r0, r2, #0
; CHECK-NEXT: vorr d0, d8, d8
; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r9, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: moveq r3, r0
; CHECK-NEXT: movne r0, r2
; CHECK-NEXT: moveq r10, r6
; CHECK-NEXT: moveq r5, r8
; CHECK-NEXT: rsbs r1, r5, #0
; CHECK-NEXT: rscs r1, r10, #-2147483648
; CHECK-NEXT: sbcs r0, r8, r0
; CHECK-NEXT: sbcs r0, r8, r3
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r4, r0, r8
; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r4, r1, r6
; CHECK-NEXT: sbcs r4, r2, #0
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: moveq r3, r4
; CHECK-NEXT: movne r6, r1
; CHECK-NEXT: movne r4, r2
; CHECK-NEXT: moveq r0, r8
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r6, #-2147483648
; CHECK-NEXT: sbcs r1, r8, r4
; CHECK-NEXT: sbcs r1, r8, r3
; CHECK-NEXT: movwlt r9, #1
; CHECK-NEXT: cmp r9, #0
; CHECK-NEXT: moveq r0, r9
; CHECK-NEXT: mov r1, #-2147483648
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: moveq r10, r1
; CHECK-NEXT: cmp r9, #0
; CHECK-NEXT: vmov.32 d1[1], r10
; CHECK-NEXT: moveq r6, r1
; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
%1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
%spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-LABEL: utest_f64i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: subs r0, r2, #1
; CHECK-NEXT: vorr d0, d8, d8
; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: movwlo r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: movwlo r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: moveq r0, r6
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: movne r7, r4
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: vmov.32 d1[1], r7
; CHECK-NEXT: movne r6, r1
; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
%0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
%conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: subs r1, r2, #1
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movwlt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mov r9, #1
; CHECK-NEXT: moveq r3, r6
; CHECK-NEXT: moveq r8, r6
; CHECK-NEXT: moveq r2, r9
; CHECK-NEXT: movne r6, r0
; CHECK-NEXT: rsbs r0, r6, #0
; CHECK-NEXT: rscs r0, r8, #0
; CHECK-NEXT: vorr d0, d8, d8
; CHECK-NEXT: rscs r0, r2, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: moveq r6, r7
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r4, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: movne r9, r2
; CHECK-NEXT: moveq r3, r4
; CHECK-NEXT: moveq r1, r4
; CHECK-NEXT: movne r4, r0
; CHECK-NEXT: rsbs r0, r4, #0
; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: rscs r0, r9, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: moveq r4, r5
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: movne r7, r8
; CHECK-NEXT: vmov.32 d0[0], r4
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 d1[1], r7
; CHECK-NEXT: movne r5, r1
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
%1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
%spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mvn r8, #0
; CHECK-NEXT: subs r0, r0, r8
; CHECK-NEXT: mvn r6, #-2147483648
; CHECK-NEXT: sbcs r0, r1, r6
; CHECK-NEXT: vmov.f32 s0, s16
; CHECK-NEXT: sbcs r0, r2, #0
; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: mov r9, #0
; CHECK-NEXT: movwlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: moveq r3, r0
; CHECK-NEXT: movne r0, r2
; CHECK-NEXT: moveq r10, r6
; CHECK-NEXT: moveq r5, r8
; CHECK-NEXT: rsbs r1, r5, #0
; CHECK-NEXT: rscs r1, r10, #-2147483648
; CHECK-NEXT: sbcs r0, r8, r0
; CHECK-NEXT: sbcs r0, r8, r3
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r4, r0, r8
; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r4, r1, r6
; CHECK-NEXT: sbcs r4, r2, #0
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: moveq r3, r4
; CHECK-NEXT: movne r6, r1
; CHECK-NEXT: movne r4, r2
; CHECK-NEXT: moveq r0, r8
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: rscs r1, r6, #-2147483648
; CHECK-NEXT: sbcs r1, r8, r4
; CHECK-NEXT: sbcs r1, r8, r3
; CHECK-NEXT: movwlt r9, #1
; CHECK-NEXT: cmp r9, #0
; CHECK-NEXT: moveq r0, r9
; CHECK-NEXT: mov r1, #-2147483648
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: moveq r10, r1
; CHECK-NEXT: cmp r9, #0
; CHECK-NEXT: vmov.32 d1[1], r10
; CHECK-NEXT: moveq r6, r1
; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
%1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
%spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-LABEL: utest_f32i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vmov.f32 s0, s16
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: subs r0, r2, #1
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: movwlo r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: moveq r5, r7
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r5
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: movwlo r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: moveq r0, r6
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: movne r7, r4
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: vmov.32 d1[1], r7
; CHECK-NEXT: movne r6, r1
; CHECK-NEXT: vmov.32 d0[1], r6
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
%0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
%conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-LABEL: ustest_f32i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: mov r8, r1
; CHECK-NEXT: subs r1, r2, #1
; CHECK-NEXT: vmov.f32 s0, s16
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: mov r9, #1
; CHECK-NEXT: movwlt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: moveq r3, r6
; CHECK-NEXT: moveq r8, r6
; CHECK-NEXT: moveq r2, r9
; CHECK-NEXT: movne r6, r0
; CHECK-NEXT: rsbs r0, r6, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: rscs r0, r8, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: rscs r0, r2, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: moveq r6, r7
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r4, r2, #1
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: movne r9, r2
; CHECK-NEXT: moveq r3, r4
; CHECK-NEXT: moveq r1, r4
; CHECK-NEXT: movne r4, r0
; CHECK-NEXT: rsbs r0, r4, #0
; CHECK-NEXT: rscs r0, r1, #0
; CHECK-NEXT: rscs r0, r9, #0
; CHECK-NEXT: rscs r0, r3, #0
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: moveq r4, r5
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: movne r7, r8
; CHECK-NEXT: vmov.32 d0[0], r4
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 d1[1], r7
; CHECK-NEXT: movne r5, r1
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
%1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
%spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: .pad #4
; CHECK-NEON-NEXT: sub sp, sp, #4
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r8, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: mvn r9, #0
; CHECK-NEON-NEXT: subs r0, r0, r9
; CHECK-NEON-NEXT: mvn r7, #-2147483648
; CHECK-NEON-NEXT: sbcs r0, r1, r7
; CHECK-NEON-NEXT: mov r11, r1
; CHECK-NEON-NEXT: sbcs r0, r2, #0
; CHECK-NEON-NEXT: vmov s0, r8
; CHECK-NEON-NEXT: sbcs r0, r3, #0
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: mov r0, #0
; CHECK-NEON-NEXT: mov r10, #0
; CHECK-NEON-NEXT: movwlt r0, #1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: moveq r3, r0
; CHECK-NEON-NEXT: movne r0, r2
; CHECK-NEON-NEXT: moveq r11, r7
; CHECK-NEON-NEXT: moveq r5, r9
; CHECK-NEON-NEXT: rsbs r1, r5, #0
; CHECK-NEON-NEXT: rscs r1, r11, #-2147483648
; CHECK-NEON-NEXT: sbcs r0, r9, r0
; CHECK-NEON-NEXT: sbcs r0, r9, r3
; CHECK-NEON-NEXT: movwlt r6, #1
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: moveq r5, r6
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: subs r4, r0, r9
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
; CHECK-NEON-NEXT: sbcs r4, r1, r7
; CHECK-NEON-NEXT: sbcs r4, r2, #0
; CHECK-NEON-NEXT: sbcs r4, r3, #0
; CHECK-NEON-NEXT: mov r4, #0
; CHECK-NEON-NEXT: movwlt r4, #1
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: moveq r3, r4
; CHECK-NEON-NEXT: movne r7, r1
; CHECK-NEON-NEXT: movne r4, r2
; CHECK-NEON-NEXT: moveq r0, r9
; CHECK-NEON-NEXT: rsbs r1, r0, #0
; CHECK-NEON-NEXT: rscs r1, r7, #-2147483648
; CHECK-NEON-NEXT: sbcs r1, r9, r4
; CHECK-NEON-NEXT: sbcs r1, r9, r3
; CHECK-NEON-NEXT: movwlt r10, #1
; CHECK-NEON-NEXT: cmp r10, #0
; CHECK-NEON-NEXT: moveq r0, r10
; CHECK-NEON-NEXT: mov r1, #-2147483648
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
; CHECK-NEON-NEXT: moveq r11, r1
; CHECK-NEON-NEXT: cmp r10, #0
; CHECK-NEON-NEXT: vmov.32 d1[1], r11
; CHECK-NEON-NEXT: moveq r7, r1
; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: add sp, sp, #4
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: mov r5, r0
; CHECK-FP16-NEXT: mvn r8, #0
; CHECK-FP16-NEXT: subs r0, r0, r8
; CHECK-FP16-NEXT: mvn r6, #-2147483648
; CHECK-FP16-NEXT: sbcs r0, r1, r6
; CHECK-FP16-NEXT: mov r10, r1
; CHECK-FP16-NEXT: sbcs r0, r2, #0
; CHECK-FP16-NEXT: vmov s0, r7
; CHECK-FP16-NEXT: sbcs r0, r3, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: mov r9, #0
; CHECK-FP16-NEXT: movwlt r0, #1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: moveq r3, r0
; CHECK-FP16-NEXT: movne r0, r2
; CHECK-FP16-NEXT: moveq r10, r6
; CHECK-FP16-NEXT: moveq r5, r8
; CHECK-FP16-NEXT: rsbs r1, r5, #0
; CHECK-FP16-NEXT: rscs r1, r10, #-2147483648
; CHECK-FP16-NEXT: sbcs r0, r8, r0
; CHECK-FP16-NEXT: sbcs r0, r8, r3
; CHECK-FP16-NEXT: movwlt r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: moveq r5, r7
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: subs r4, r0, r8
; CHECK-FP16-NEXT: vmov.32 d1[0], r5
; CHECK-FP16-NEXT: sbcs r4, r1, r6
; CHECK-FP16-NEXT: sbcs r4, r2, #0
; CHECK-FP16-NEXT: sbcs r4, r3, #0
; CHECK-FP16-NEXT: mov r4, #0
; CHECK-FP16-NEXT: movwlt r4, #1
; CHECK-FP16-NEXT: cmp r4, #0
; CHECK-FP16-NEXT: moveq r3, r4
; CHECK-FP16-NEXT: movne r6, r1
; CHECK-FP16-NEXT: movne r4, r2
; CHECK-FP16-NEXT: moveq r0, r8
; CHECK-FP16-NEXT: rsbs r1, r0, #0
; CHECK-FP16-NEXT: rscs r1, r6, #-2147483648
; CHECK-FP16-NEXT: sbcs r1, r8, r4
; CHECK-FP16-NEXT: sbcs r1, r8, r3
; CHECK-FP16-NEXT: movwlt r9, #1
; CHECK-FP16-NEXT: cmp r9, #0
; CHECK-FP16-NEXT: moveq r0, r9
; CHECK-FP16-NEXT: mov r1, #-2147483648
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
; CHECK-FP16-NEXT: moveq r10, r1
; CHECK-FP16-NEXT: cmp r9, #0
; CHECK-FP16-NEXT: vmov.32 d1[1], r10
; CHECK-FP16-NEXT: moveq r6, r1
; CHECK-FP16-NEXT: vmov.32 d0[1], r6
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
%1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
%spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixunssfti
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: subs r0, r2, #1
; CHECK-NEON-NEXT: vmov s0, r5
; CHECK-NEON-NEXT: sbcs r0, r3, #0
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: movwlo r5, #1
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: mov r7, #0
; CHECK-NEON-NEXT: moveq r6, r5
; CHECK-NEON-NEXT: bl __fixunssfti
; CHECK-NEON-NEXT: subs r2, r2, #1
; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: sbcs r2, r3, #0
; CHECK-NEON-NEXT: movwlo r7, #1
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: moveq r0, r7
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: movne r5, r4
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: vmov.32 d1[1], r5
; CHECK-NEON-NEXT: movne r7, r1
; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: utesth_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfti
; CHECK-FP16-NEXT: mov r5, r0
; CHECK-FP16-NEXT: subs r0, r2, #1
; CHECK-FP16-NEXT: vmov s0, r7
; CHECK-FP16-NEXT: sbcs r0, r3, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: mov r4, r1
; CHECK-FP16-NEXT: movwlo r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: moveq r5, r7
; CHECK-FP16-NEXT: bl __fixunshfti
; CHECK-FP16-NEXT: subs r2, r2, #1
; CHECK-FP16-NEXT: vmov.32 d1[0], r5
; CHECK-FP16-NEXT: sbcs r2, r3, #0
; CHECK-FP16-NEXT: movwlo r6, #1
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: moveq r0, r6
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: movne r7, r4
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: vmov.32 d1[1], r7
; CHECK-FP16-NEXT: movne r6, r1
; CHECK-FP16-NEXT: vmov.32 d0[1], r6
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x half> %x to <2 x i128>
%0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
%conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-NEON-LABEL: ustest_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: mov r8, r1
; CHECK-NEON-NEXT: subs r1, r2, #1
; CHECK-NEON-NEXT: vmov s0, r5
; CHECK-NEON-NEXT: sbcs r1, r3, #0
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: mov r9, #1
; CHECK-NEON-NEXT: movwlt r5, #1
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: moveq r3, r5
; CHECK-NEON-NEXT: moveq r8, r5
; CHECK-NEON-NEXT: moveq r2, r9
; CHECK-NEON-NEXT: movne r5, r0
; CHECK-NEON-NEXT: rsbs r0, r5, #0
; CHECK-NEON-NEXT: mov r7, #0
; CHECK-NEON-NEXT: rscs r0, r8, #0
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: rscs r0, r2, #0
; CHECK-NEON-NEXT: rscs r0, r3, #0
; CHECK-NEON-NEXT: movwlt r7, #1
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: moveq r5, r7
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: subs r4, r2, #1
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
; CHECK-NEON-NEXT: sbcs r4, r3, #0
; CHECK-NEON-NEXT: mov r4, #0
; CHECK-NEON-NEXT: movwlt r4, #1
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: movne r9, r2
; CHECK-NEON-NEXT: moveq r3, r4
; CHECK-NEON-NEXT: moveq r1, r4
; CHECK-NEON-NEXT: movne r4, r0
; CHECK-NEON-NEXT: rsbs r0, r4, #0
; CHECK-NEON-NEXT: rscs r0, r1, #0
; CHECK-NEON-NEXT: rscs r0, r9, #0
; CHECK-NEON-NEXT: rscs r0, r3, #0
; CHECK-NEON-NEXT: movwlt r6, #1
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: moveq r4, r6
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: movne r7, r8
; CHECK-NEON-NEXT: vmov.32 d0[0], r4
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: vmov.32 d1[1], r7
; CHECK-NEON-NEXT: movne r6, r1
; CHECK-NEON-NEXT: vmov.32 d0[1], r6
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; CHECK-FP16-LABEL: ustest_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vmov.u16 r4, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: mov r8, r1
; CHECK-FP16-NEXT: subs r1, r2, #1
; CHECK-FP16-NEXT: sbcs r1, r3, #0
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: movwlt r6, #1
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: mov r9, #1
; CHECK-FP16-NEXT: moveq r3, r6
; CHECK-FP16-NEXT: moveq r8, r6
; CHECK-FP16-NEXT: moveq r2, r9
; CHECK-FP16-NEXT: movne r6, r0
; CHECK-FP16-NEXT: rsbs r0, r6, #0
; CHECK-FP16-NEXT: rscs r0, r8, #0
; CHECK-FP16-NEXT: vmov s0, r4
; CHECK-FP16-NEXT: rscs r0, r2, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: rscs r0, r3, #0
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movwlt r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: moveq r6, r7
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: subs r4, r2, #1
; CHECK-FP16-NEXT: vmov.32 d1[0], r6
; CHECK-FP16-NEXT: sbcs r4, r3, #0
; CHECK-FP16-NEXT: mov r4, #0
; CHECK-FP16-NEXT: movwlt r4, #1
; CHECK-FP16-NEXT: cmp r4, #0
; CHECK-FP16-NEXT: movne r9, r2
; CHECK-FP16-NEXT: moveq r3, r4
; CHECK-FP16-NEXT: moveq r1, r4
; CHECK-FP16-NEXT: movne r4, r0
; CHECK-FP16-NEXT: rsbs r0, r4, #0
; CHECK-FP16-NEXT: rscs r0, r1, #0
; CHECK-FP16-NEXT: rscs r0, r9, #0
; CHECK-FP16-NEXT: rscs r0, r3, #0
; CHECK-FP16-NEXT: movwlt r5, #1
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: moveq r4, r5
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: movne r7, r8
; CHECK-FP16-NEXT: vmov.32 d0[0], r4
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: vmov.32 d1[1], r7
; CHECK-FP16-NEXT: movne r5, r1
; CHECK-FP16-NEXT: vmov.32 d0[1], r5
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
%spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
%1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
%spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
; i32 saturate
define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
; CHECK-LABEL: stest_f64i32_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: vmov r0, r2, d9
; CHECK-NEXT: cmn r4, #-2147483647
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: movlo r3, r4
; CHECK-NEXT: mvn r5, #-2147483648
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movpl r4, r5
; CHECK-NEXT: movpl r1, r6
; CHECK-NEXT: moveq r4, r3
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: mov r3, #-2147483648
; CHECK-NEXT: mov r7, #-2147483648
; CHECK-NEXT: movgt r3, r4
; CHECK-NEXT: cmp r4, #-2147483648
; CHECK-NEXT: movls r4, r7
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: movne r4, r3
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: cmn r0, #-2147483647
; CHECK-NEXT: mvn r2, #-2147483648
; CHECK-NEXT: movlo r2, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: movmi r5, r0
; CHECK-NEXT: movmi r6, r1
; CHECK-NEXT: moveq r5, r2
; CHECK-NEXT: cmn r6, #1
; CHECK-NEXT: mov r0, #-2147483648
; CHECK-NEXT: vmov.32 d0[0], r4
; CHECK-NEXT: movgt r0, r5
; CHECK-NEXT: cmp r5, #-2147483648
; CHECK-NEXT: movls r5, r7
; CHECK-NEXT: cmn r6, #1
; CHECK-NEXT: movne r5, r0
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
%spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>)
%spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>)
%conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
ret <2 x i32> %conv6
}
define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
; CHECK-LABEL: utest_f64i32_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmov r2, r1, d8
; CHECK-NEXT: vmov.32 d9[0], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov.32 d8[0], r0
; CHECK-NEXT: vmov.i64 q8, #0xffffffff
; CHECK-NEXT: vmov.32 d9[1], r4
; CHECK-NEXT: vmov.32 d8[1], r1
; CHECK-NEXT: vqsub.u64 q8, q4, q8
; CHECK-NEXT: vsub.i64 q8, q4, q8
; CHECK-NEXT: vmovn.i64 d0, q8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, pc}
entry:
%conv = fptoui <2 x double> %x to <2 x i64>
%spec.store.select = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
%conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
ret <2 x i32> %conv6
}
define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i32_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov r2, r12, d9
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvn r3, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movmi r3, r0
; CHECK-NEXT: movpl r1, r5
; CHECK-NEXT: moveq r3, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: mvn r4, #0
; CHECK-NEXT: movwgt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: movne r6, r3
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: moveq r6, r3
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r12
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 d0[0], r6
; CHECK-NEXT: movmi r4, r0
; CHECK-NEXT: movpl r1, r5
; CHECK-NEXT: moveq r4, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: movwgt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: movne r5, r4
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: moveq r5, r4
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i64>
%spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
%spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> zeroinitializer)
%conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
ret <2 x i32> %conv6
}
define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: stest_f32i32_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: mov r8, #-2147483648
; CHECK-NEXT: mvn r7, #-2147483648
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: vmov r5, s16
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: cmn r0, #-2147483647
; CHECK-NEXT: mvn r0, #-2147483648
; CHECK-NEXT: mov r9, #0
; CHECK-NEXT: movlo r0, r4
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: movpl r4, r7
; CHECK-NEXT: movpl r1, r9
; CHECK-NEXT: moveq r4, r0
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: mov r0, #-2147483648
; CHECK-NEXT: movgt r0, r4
; CHECK-NEXT: cmp r4, #-2147483648
; CHECK-NEXT: movls r4, r8
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: movne r4, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: cmn r0, #-2147483647
; CHECK-NEXT: mvn r0, #-2147483648
; CHECK-NEXT: mov r2, #-2147483648
; CHECK-NEXT: movlo r0, r5
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: movpl r5, r7
; CHECK-NEXT: movpl r1, r9
; CHECK-NEXT: moveq r5, r0
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: movgt r2, r5
; CHECK-NEXT: cmp r5, #-2147483648
; CHECK-NEXT: movls r5, r8
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: movne r5, r2
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: cmn r0, #-2147483647
; CHECK-NEXT: mvn r0, #-2147483648
; CHECK-NEXT: mov r2, #-2147483648
; CHECK-NEXT: movlo r0, r6
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: movpl r6, r7
; CHECK-NEXT: movpl r1, r9
; CHECK-NEXT: moveq r6, r0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: movgt r2, r6
; CHECK-NEXT: cmp r6, #-2147483648
; CHECK-NEXT: movls r6, r8
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: movne r6, r2
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: cmn r0, #-2147483647
; CHECK-NEXT: mvn r2, #-2147483648
; CHECK-NEXT: movlo r2, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: movmi r7, r0
; CHECK-NEXT: movmi r9, r1
; CHECK-NEXT: moveq r7, r2
; CHECK-NEXT: cmn r9, #1
; CHECK-NEXT: mov r0, #-2147483648
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: movgt r0, r7
; CHECK-NEXT: cmp r7, #-2147483648
; CHECK-NEXT: vmov.32 d0[0], r5
; CHECK-NEXT: movls r7, r8
; CHECK-NEXT: cmn r9, #1
; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: movne r7, r0
; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
%spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
%conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: utest_f32i32_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmov r1, s18
; CHECK-NEXT: vmov r5, s19
; CHECK-NEXT: vmov r6, s16
; CHECK-NEXT: vmov.32 d9[0], r0
; CHECK-NEXT: mov r0, r1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vmov.32 d10[0], r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vmov.32 d11[0], r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: bl __aeabi_f2ulz
; CHECK-NEXT: vmov.32 d8[0], r0
; CHECK-NEXT: vmov.i64 q8, #0xffffffff
; CHECK-NEXT: vmov.32 d11[1], r5
; CHECK-NEXT: vmov.32 d9[1], r4
; CHECK-NEXT: vmov.32 d10[1], r7
; CHECK-NEXT: vmov.32 d8[1], r1
; CHECK-NEXT: vqsub.u64 q9, q5, q8
; CHECK-NEXT: vqsub.u64 q8, q4, q8
; CHECK-NEXT: vsub.i64 q9, q5, q9
; CHECK-NEXT: vsub.i64 q8, q4, q8
; CHECK-NEXT: vmovn.i64 d1, q9
; CHECK-NEXT: vmovn.i64 d0, q8
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <4 x float> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
%conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i32_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: mvn r9, #0
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: vmov r5, s16
; CHECK-NEXT: vmov r8, s18
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvn r2, #0
; CHECK-NEXT: movmi r2, r0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: moveq r2, r0
; CHECK-NEXT: movpl r1, r7
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: movwgt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: movne r4, r2
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: moveq r4, r2
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvn r2, #0
; CHECK-NEXT: movmi r2, r0
; CHECK-NEXT: movpl r1, r7
; CHECK-NEXT: moveq r2, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: movwgt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: movne r5, r2
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: moveq r5, r2
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mvn r2, #0
; CHECK-NEXT: movmi r2, r0
; CHECK-NEXT: movpl r1, r7
; CHECK-NEXT: moveq r2, r0
; CHECK-NEXT: vmov r0, s17
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movwgt r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: movne r6, r2
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: moveq r6, r2
; CHECK-NEXT: bl __aeabi_f2lz
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: movmi r9, r0
; CHECK-NEXT: movpl r1, r7
; CHECK-NEXT: moveq r9, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: movwgt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: vmov.32 d0[0], r5
; CHECK-NEXT: movne r7, r9
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 d1[1], r4
; CHECK-NEXT: moveq r7, r9
; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <4 x float> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
%spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
%conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i32_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
; CHECK-NEON-NEXT: vpush {d8, d9, d10}
; CHECK-NEON-NEXT: vmov r0, s3
; CHECK-NEON-NEXT: vmov.f32 s18, s2
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r4, r0
; CHECK-NEON-NEXT: vmov r0, s20
; CHECK-NEON-NEXT: cmn r4, #-2147483647
; CHECK-NEON-NEXT: mvn r2, #-2147483648
; CHECK-NEON-NEXT: movlo r2, r4
; CHECK-NEON-NEXT: mvn r7, #-2147483648
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mov r9, #0
; CHECK-NEON-NEXT: movpl r4, r7
; CHECK-NEON-NEXT: movpl r1, r9
; CHECK-NEON-NEXT: moveq r4, r2
; CHECK-NEON-NEXT: cmn r1, #1
; CHECK-NEON-NEXT: mov r2, #-2147483648
; CHECK-NEON-NEXT: mov r8, #-2147483648
; CHECK-NEON-NEXT: movgt r2, r4
; CHECK-NEON-NEXT: cmp r4, #-2147483648
; CHECK-NEON-NEXT: movls r4, r8
; CHECK-NEON-NEXT: cmn r1, #1
; CHECK-NEON-NEXT: movne r4, r2
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: cmn r0, #-2147483647
; CHECK-NEON-NEXT: mvn r0, #-2147483648
; CHECK-NEON-NEXT: mov r2, #-2147483648
; CHECK-NEON-NEXT: movlo r0, r5
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: movpl r5, r7
; CHECK-NEON-NEXT: movpl r1, r9
; CHECK-NEON-NEXT: moveq r5, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: cmn r1, #1
; CHECK-NEON-NEXT: movgt r2, r5
; CHECK-NEON-NEXT: cmp r5, #-2147483648
; CHECK-NEON-NEXT: movls r5, r8
; CHECK-NEON-NEXT: cmn r1, #1
; CHECK-NEON-NEXT: movne r5, r2
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: cmn r0, #-2147483647
; CHECK-NEON-NEXT: mvn r0, #-2147483648
; CHECK-NEON-NEXT: mov r2, #-2147483648
; CHECK-NEON-NEXT: movlo r0, r6
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: movpl r6, r7
; CHECK-NEON-NEXT: movpl r1, r9
; CHECK-NEON-NEXT: moveq r6, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: cmn r1, #1
; CHECK-NEON-NEXT: movgt r2, r6
; CHECK-NEON-NEXT: cmp r6, #-2147483648
; CHECK-NEON-NEXT: movls r6, r8
; CHECK-NEON-NEXT: cmn r1, #1
; CHECK-NEON-NEXT: movne r6, r2
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: cmn r0, #-2147483647
; CHECK-NEON-NEXT: mvn r2, #-2147483648
; CHECK-NEON-NEXT: movlo r2, r0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: movmi r7, r0
; CHECK-NEON-NEXT: movmi r9, r1
; CHECK-NEON-NEXT: moveq r7, r2
; CHECK-NEON-NEXT: cmn r9, #1
; CHECK-NEON-NEXT: mov r0, #-2147483648
; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: movgt r0, r7
; CHECK-NEON-NEXT: cmp r7, #-2147483648
; CHECK-NEON-NEXT: vmov.32 d0[0], r5
; CHECK-NEON-NEXT: movls r7, r8
; CHECK-NEON-NEXT: cmn r9, #1
; CHECK-NEON-NEXT: vmov.32 d1[1], r4
; CHECK-NEON-NEXT: movne r7, r0
; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i32_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: .vsave {d8, d9}
; CHECK-FP16-NEXT: vpush {d8, d9}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
; CHECK-FP16-NEXT: vorr d8, d0, d0
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: mov r4, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
; CHECK-FP16-NEXT: vmov.u16 r2, d8[0]
; CHECK-FP16-NEXT: cmn r4, #-2147483647
; CHECK-FP16-NEXT: mvn r7, #-2147483648
; CHECK-FP16-NEXT: mov r9, #0
; CHECK-FP16-NEXT: mov r8, #-2147483648
; CHECK-FP16-NEXT: vmov s18, r0
; CHECK-FP16-NEXT: mvn r0, #-2147483648
; CHECK-FP16-NEXT: movlo r0, r4
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: movpl r4, r7
; CHECK-FP16-NEXT: movpl r1, r9
; CHECK-FP16-NEXT: moveq r4, r0
; CHECK-FP16-NEXT: cmn r1, #1
; CHECK-FP16-NEXT: mov r0, #-2147483648
; CHECK-FP16-NEXT: vmov s0, r2
; CHECK-FP16-NEXT: movgt r0, r4
; CHECK-FP16-NEXT: cmp r4, #-2147483648
; CHECK-FP16-NEXT: movls r4, r8
; CHECK-FP16-NEXT: cmn r1, #1
; CHECK-FP16-NEXT: movne r4, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.f32 s0, s18
; CHECK-FP16-NEXT: mov r5, r0
; CHECK-FP16-NEXT: cmn r0, #-2147483647
; CHECK-FP16-NEXT: mvn r0, #-2147483648
; CHECK-FP16-NEXT: movlo r0, r5
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: movpl r5, r7
; CHECK-FP16-NEXT: movpl r1, r9
; CHECK-FP16-NEXT: moveq r5, r0
; CHECK-FP16-NEXT: cmn r1, #1
; CHECK-FP16-NEXT: mov r0, #-2147483648
; CHECK-FP16-NEXT: movgt r0, r5
; CHECK-FP16-NEXT: cmp r5, #-2147483648
; CHECK-FP16-NEXT: movls r5, r8
; CHECK-FP16-NEXT: cmn r1, #1
; CHECK-FP16-NEXT: movne r5, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.u16 r2, d8[1]
; CHECK-FP16-NEXT: mov r6, r0
; CHECK-FP16-NEXT: cmn r0, #-2147483647
; CHECK-FP16-NEXT: mvn r0, #-2147483648
; CHECK-FP16-NEXT: movlo r0, r6
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: movpl r6, r7
; CHECK-FP16-NEXT: movpl r1, r9
; CHECK-FP16-NEXT: moveq r6, r0
; CHECK-FP16-NEXT: cmn r1, #1
; CHECK-FP16-NEXT: mov r0, #-2147483648
; CHECK-FP16-NEXT: movgt r0, r6
; CHECK-FP16-NEXT: cmp r6, #-2147483648
; CHECK-FP16-NEXT: movls r6, r8
; CHECK-FP16-NEXT: cmn r1, #1
; CHECK-FP16-NEXT: movne r6, r0
; CHECK-FP16-NEXT: vmov s0, r2
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: cmn r0, #-2147483647
; CHECK-FP16-NEXT: mvn r2, #-2147483648
; CHECK-FP16-NEXT: movlo r2, r0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: movmi r7, r0
; CHECK-FP16-NEXT: movmi r9, r1
; CHECK-FP16-NEXT: moveq r7, r2
; CHECK-FP16-NEXT: cmn r9, #1
; CHECK-FP16-NEXT: mov r0, #-2147483648
; CHECK-FP16-NEXT: vmov.32 d1[0], r6
; CHECK-FP16-NEXT: movgt r0, r7
; CHECK-FP16-NEXT: cmp r7, #-2147483648
; CHECK-FP16-NEXT: vmov.32 d0[0], r5
; CHECK-FP16-NEXT: movls r7, r8
; CHECK-FP16-NEXT: cmn r9, #1
; CHECK-FP16-NEXT: vmov.32 d1[1], r4
; CHECK-FP16-NEXT: movne r7, r0
; CHECK-FP16-NEXT: vmov.32 d0[1], r7
; CHECK-FP16-NEXT: vpop {d8, d9}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
%spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
%conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i32_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s3
; CHECK-NEON-NEXT: vmov.f32 s18, s2
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
; CHECK-NEON-NEXT: mov r4, r1
; CHECK-NEON-NEXT: vmov r1, s18
; CHECK-NEON-NEXT: vmov r6, s16
; CHECK-NEON-NEXT: vmov.32 d9[0], r0
; CHECK-NEON-NEXT: vmov r7, s20
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
; CHECK-NEON-NEXT: vmov.32 d10[0], r0
; CHECK-NEON-NEXT: mov r0, r6
; CHECK-NEON-NEXT: mov r5, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
; CHECK-NEON-NEXT: vmov.32 d11[0], r0
; CHECK-NEON-NEXT: mov r0, r7
; CHECK-NEON-NEXT: mov r6, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2ulz
; CHECK-NEON-NEXT: vmov.32 d8[0], r0
; CHECK-NEON-NEXT: vmov.i64 q8, #0xffffffff
; CHECK-NEON-NEXT: vmov.32 d11[1], r6
; CHECK-NEON-NEXT: vmov.32 d9[1], r4
; CHECK-NEON-NEXT: vmov.32 d10[1], r5
; CHECK-NEON-NEXT: vmov.32 d8[1], r1
; CHECK-NEON-NEXT: vqsub.u64 q9, q5, q8
; CHECK-NEON-NEXT: vqsub.u64 q8, q4, q8
; CHECK-NEON-NEXT: vsub.i64 q9, q5, q9
; CHECK-NEON-NEXT: vsub.i64 q8, q4, q8
; CHECK-NEON-NEXT: vmovn.i64 d1, q9
; CHECK-NEON-NEXT: vmovn.i64 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: utesth_f16i32_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13}
; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13}
; CHECK-FP16-NEXT: .vsave {d8}
; CHECK-FP16-NEXT: vpush {d8}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vorr d8, d0, d0
; CHECK-FP16-NEXT: vmov.u16 r6, d0[3]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: mov r4, r1
; CHECK-FP16-NEXT: vmov.u16 r1, d8[2]
; CHECK-FP16-NEXT: vmov.32 d11[0], r0
; CHECK-FP16-NEXT: vmov s0, r1
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: vmov s0, r6
; CHECK-FP16-NEXT: mov r5, r1
; CHECK-FP16-NEXT: vmov.32 d12[0], r0
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: mov r6, r1
; CHECK-FP16-NEXT: vmov.u16 r1, d8[0]
; CHECK-FP16-NEXT: vmov.32 d13[0], r0
; CHECK-FP16-NEXT: vmov s0, r1
; CHECK-FP16-NEXT: bl __fixunshfdi
; CHECK-FP16-NEXT: vmov.32 d10[0], r0
; CHECK-FP16-NEXT: vmov.i64 q8, #0xffffffff
; CHECK-FP16-NEXT: vmov.32 d13[1], r6
; CHECK-FP16-NEXT: vmov.32 d11[1], r4
; CHECK-FP16-NEXT: vmov.32 d12[1], r5
; CHECK-FP16-NEXT: vmov.32 d10[1], r1
; CHECK-FP16-NEXT: vqsub.u64 q9, q6, q8
; CHECK-FP16-NEXT: vqsub.u64 q8, q5, q8
; CHECK-FP16-NEXT: vsub.i64 q9, q6, q9
; CHECK-FP16-NEXT: vsub.i64 q8, q5, q8
; CHECK-FP16-NEXT: vmovn.i64 d1, q9
; CHECK-FP16-NEXT: vmovn.i64 d0, q8
; CHECK-FP16-NEXT: vpop {d8}
; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13}
; CHECK-FP16-NEXT: pop {r4, r5, r6, pc}
entry:
%conv = fptoui <4 x half> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
%conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
ret <4 x i32> %conv6
}
define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-LABEL: ustest_f16i32_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
; CHECK-NEON-NEXT: vpush {d8, d9, d10}
; CHECK-NEON-NEXT: vmov r0, s3
; CHECK-NEON-NEXT: vmov.f32 s18, s2
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: vmov.f32 s20, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: vmov r2, s20
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvn r3, #0
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: movmi r3, r0
; CHECK-NEON-NEXT: movpl r1, r6
; CHECK-NEON-NEXT: moveq r3, r0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mov r7, #0
; CHECK-NEON-NEXT: vmov r8, s18
; CHECK-NEON-NEXT: movwgt r7, #1
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: movne r7, r3
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvn r9, #0
; CHECK-NEON-NEXT: moveq r7, r3
; CHECK-NEON-NEXT: mov r0, r2
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvn r2, #0
; CHECK-NEON-NEXT: movmi r2, r0
; CHECK-NEON-NEXT: movpl r1, r6
; CHECK-NEON-NEXT: moveq r2, r0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mov r4, #0
; CHECK-NEON-NEXT: mov r0, r8
; CHECK-NEON-NEXT: movwgt r4, #1
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: movne r4, r2
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: moveq r4, r2
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mvn r2, #0
; CHECK-NEON-NEXT: movmi r2, r0
; CHECK-NEON-NEXT: movpl r1, r6
; CHECK-NEON-NEXT: moveq r2, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: movwgt r5, #1
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: movne r5, r2
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: moveq r5, r2
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: bl __aeabi_f2lz
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: vmov.32 d1[0], r5
; CHECK-NEON-NEXT: movmi r9, r0
; CHECK-NEON-NEXT: movpl r1, r6
; CHECK-NEON-NEXT: moveq r9, r0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: movwgt r6, #1
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: vmov.32 d0[0], r4
; CHECK-NEON-NEXT: movne r6, r9
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: vmov.32 d1[1], r7
; CHECK-NEON-NEXT: moveq r6, r9
; CHECK-NEON-NEXT: vmov.32 d0[1], r6
; CHECK-NEON-NEXT: vpop {d8, d9, d10}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; CHECK-FP16-LABEL: ustest_f16i32_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr}
; CHECK-FP16-NEXT: .vsave {d8, d9}
; CHECK-FP16-NEXT: vpush {d8, d9}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
; CHECK-FP16-NEXT: vorr d8, d0, d0
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.u16 r2, d8[1]
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: vmov.u16 r7, d8[0]
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: vmov.u16 r3, d8[2]
; CHECK-FP16-NEXT: movpl r1, r5
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: mvn r8, #0
; CHECK-FP16-NEXT: vmov s16, r2
; CHECK-FP16-NEXT: mvn r2, #0
; CHECK-FP16-NEXT: movmi r2, r0
; CHECK-FP16-NEXT: vmov s0, r7
; CHECK-FP16-NEXT: moveq r2, r0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: movwgt r6, #1
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: movne r6, r2
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: vmov s18, r3
; CHECK-FP16-NEXT: moveq r6, r2
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.f32 s0, s18
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: mvn r2, #0
; CHECK-FP16-NEXT: movpl r1, r5
; CHECK-FP16-NEXT: movmi r2, r0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: moveq r2, r0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: movwgt r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: movne r7, r2
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: moveq r7, r2
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: vmov.f32 s0, s16
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: mvn r2, #0
; CHECK-FP16-NEXT: movpl r1, r5
; CHECK-FP16-NEXT: movmi r2, r0
; CHECK-FP16-NEXT: mov r4, #0
; CHECK-FP16-NEXT: moveq r2, r0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: movwgt r4, #1
; CHECK-FP16-NEXT: cmp r4, #0
; CHECK-FP16-NEXT: movne r4, r2
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: moveq r4, r2
; CHECK-FP16-NEXT: bl __fixhfdi
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: vmov.32 d1[0], r4
; CHECK-FP16-NEXT: movmi r8, r0
; CHECK-FP16-NEXT: movpl r1, r5
; CHECK-FP16-NEXT: moveq r8, r0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: movwgt r5, #1
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: vmov.32 d0[0], r7
; CHECK-FP16-NEXT: movne r5, r8
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: vmov.32 d1[1], r6
; CHECK-FP16-NEXT: moveq r5, r8
; CHECK-FP16-NEXT: vmov.32 d0[1], r5
; CHECK-FP16-NEXT: vpop {d8, d9}
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc}
entry:
%conv = fptosi <4 x half> %x to <4 x i64>
%spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
%spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
%conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
ret <4 x i32> %conv6
}
; i16 saturate
define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
; CHECK-LABEL: stest_f64i16_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f64 s4, d0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vcvt.s32.f64 s0, d1
; CHECK-NEXT: vmov.i32 d17, #0x7fff
; CHECK-NEXT: vmvn.i32 d18, #0x7fff
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.32 d16[1], r0
; CHECK-NEXT: vmin.s32 d16, d16, d17
; CHECK-NEXT: vmax.s32 d0, d16, d18
; CHECK-NEXT: bx lr
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>)
%spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>)
%conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
ret <2 x i16> %conv6
}
define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
; CHECK-LABEL: utest_f64i16_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.u32.f64 s4, d0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vcvt.u32.f64 s0, d1
; CHECK-NEXT: vmov.i32 d17, #0xffff
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.32 d16[1], r0
; CHECK-NEXT: vmin.u32 d0, d16, d17
; CHECK-NEXT: bx lr
entry:
%conv = fptoui <2 x double> %x to <2 x i32>
%spec.store.select = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
%conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
ret <2 x i16> %conv6
}
define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i16_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f64 s4, d0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vcvt.s32.f64 s0, d1
; CHECK-NEXT: vmov.i32 d17, #0xffff
; CHECK-NEXT: vmov.i32 d18, #0x0
; CHECK-NEXT: vmov.32 d16[0], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.32 d16[1], r0
; CHECK-NEXT: vmin.s32 d16, d16, d17
; CHECK-NEXT: vmax.s32 d0, d16, d18
; CHECK-NEXT: bx lr
entry:
%conv = fptosi <2 x double> %x to <2 x i32>
%spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
%spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> zeroinitializer)
%conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
ret <2 x i16> %conv6
}
define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-LABEL: stest_f32i16_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f32 q8, q0
; CHECK-NEXT: vmov.i32 q9, #0x7fff
; CHECK-NEXT: vmvn.i32 q10, #0x7fff
; CHECK-NEXT: vmin.s32 q8, q8, q9
; CHECK-NEXT: vmax.s32 q8, q8, q10
; CHECK-NEXT: vmovn.i32 d0, q8
; CHECK-NEXT: bx lr
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
%spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
%spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
%conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
ret <4 x i16> %conv6
}
define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-LABEL: utest_f32i16_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.u32.f32 q8, q0
; CHECK-NEXT: vmov.i32 q9, #0xffff
; CHECK-NEXT: vmin.u32 q8, q8, q9
; CHECK-NEXT: vmovn.i32 d0, q8
; CHECK-NEXT: bx lr
entry:
%conv = fptoui <4 x float> %x to <4 x i32>
%spec.store.select = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
%conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
ret <4 x i16> %conv6
}
define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-LABEL: ustest_f32i16_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcvt.s32.f32 q8, q0
; CHECK-NEXT: vmov.i32 q9, #0xffff
; CHECK-NEXT: vmov.i32 q10, #0x0
; CHECK-NEXT: vmin.s32 q8, q8, q9
; CHECK-NEXT: vmax.s32 q8, q8, q10
; CHECK-NEXT: vmovn.i32 d0, q8
; CHECK-NEXT: bx lr
entry:
%conv = fptosi <4 x float> %x to <4 x i32>
%spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
%spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
%conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
ret <4 x i16> %conv6
}
define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i16_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s7
; CHECK-NEON-NEXT: vmov.f32 s18, s6
; CHECK-NEON-NEXT: vmov.f32 s20, s5
; CHECK-NEON-NEXT: vmov.f32 s22, s4
; CHECK-NEON-NEXT: vmov.f32 s24, s3
; CHECK-NEON-NEXT: vmov.f32 s26, s2
; CHECK-NEON-NEXT: vmov.f32 s28, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r4, r0
; CHECK-NEON-NEXT: vmov r0, s26
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s22
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: vmov r0, s24
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r7, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov s22, r7
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s30, r6
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[1], r0
; CHECK-NEON-NEXT: vmov r0, s28
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r1, s20
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r5
; CHECK-NEON-NEXT: vcvt.s32.f32 s20, s2
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s30
; CHECK-NEON-NEXT: vmov.32 d8[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r0, s20
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r4
; CHECK-NEON-NEXT: vmov.i32 q8, #0x7fff
; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2
; CHECK-NEON-NEXT: vmvn.i32 q9, #0x7fff
; CHECK-NEON-NEXT: vmov.32 d9[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s22
; CHECK-NEON-NEXT: vmov.32 d12[1], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmin.s32 q10, q6, q8
; CHECK-NEON-NEXT: vmax.s32 q10, q10, q9
; CHECK-NEON-NEXT: vmov.32 d9[1], r0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: vmovn.i32 d1, q10
; CHECK-NEON-NEXT: vmov.32 d8[1], r0
; CHECK-NEON-NEXT: vmin.s32 q8, q4, q8
; CHECK-NEON-NEXT: vmax.s32 q8, q8, q9
; CHECK-NEON-NEXT: vmovn.i32 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i16_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s12, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s3
; CHECK-FP16-NEXT: vcvt.s32.f16 s5, s2
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s14, s1
; CHECK-FP16-NEXT: vmovx.f16 s10, s3
; CHECK-FP16-NEXT: vmovx.f16 s8, s2
; CHECK-FP16-NEXT: vcvt.s32.f16 s10, s10
; CHECK-FP16-NEXT: vcvt.s32.f16 s8, s8
; CHECK-FP16-NEXT: vmovx.f16 s6, s1
; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s6
; CHECK-FP16-NEXT: vmov.i32 q10, #0x7fff
; CHECK-FP16-NEXT: vmvn.i32 q11, #0x7fff
; CHECK-FP16-NEXT: vmov.32 d17[0], r0
; CHECK-FP16-NEXT: vmov r0, s5
; CHECK-FP16-NEXT: vmov.32 d16[0], r0
; CHECK-FP16-NEXT: vmov r0, s14
; CHECK-FP16-NEXT: vmov.32 d19[0], r0
; CHECK-FP16-NEXT: vmov r0, s12
; CHECK-FP16-NEXT: vmov.32 d18[0], r0
; CHECK-FP16-NEXT: vmov r0, s10
; CHECK-FP16-NEXT: vmov.32 d17[1], r0
; CHECK-FP16-NEXT: vmov r0, s8
; CHECK-FP16-NEXT: vmov.32 d16[1], r0
; CHECK-FP16-NEXT: vmov r0, s6
; CHECK-FP16-NEXT: vmin.s32 q8, q8, q10
; CHECK-FP16-NEXT: vmax.s32 q8, q8, q11
; CHECK-FP16-NEXT: vmovn.i32 d1, q8
; CHECK-FP16-NEXT: vmov.32 d19[1], r0
; CHECK-FP16-NEXT: vmov r0, s4
; CHECK-FP16-NEXT: vmov.32 d18[1], r0
; CHECK-FP16-NEXT: vmin.s32 q9, q9, q10
; CHECK-FP16-NEXT: vmax.s32 q9, q9, q11
; CHECK-FP16-NEXT: vmovn.i32 d0, q9
; CHECK-FP16-NEXT: bx lr
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
%spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
%spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
%conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
ret <8 x i16> %conv6
}
define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i16_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s7
; CHECK-NEON-NEXT: vmov.f32 s18, s6
; CHECK-NEON-NEXT: vmov.f32 s20, s5
; CHECK-NEON-NEXT: vmov.f32 s22, s4
; CHECK-NEON-NEXT: vmov.f32 s24, s3
; CHECK-NEON-NEXT: vmov.f32 s26, s2
; CHECK-NEON-NEXT: vmov.f32 s28, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r4, r0
; CHECK-NEON-NEXT: vmov r0, s26
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s22
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: vmov r0, s24
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r7, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov s16, r7
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s18, r6
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[1], r0
; CHECK-NEON-NEXT: vmov r0, s28
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r1, s20
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r5
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s18
; CHECK-NEON-NEXT: vcvt.u32.f32 s18, s2
; CHECK-NEON-NEXT: vmov.32 d10[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r4
; CHECK-NEON-NEXT: vmov.i32 q8, #0xffff
; CHECK-NEON-NEXT: vcvt.u32.f32 s2, s2
; CHECK-NEON-NEXT: vmov.32 d11[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s16
; CHECK-NEON-NEXT: vmov.32 d12[1], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmin.u32 q9, q6, q8
; CHECK-NEON-NEXT: vmov.32 d11[1], r0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: vmovn.i32 d1, q9
; CHECK-NEON-NEXT: vmov.32 d10[1], r0
; CHECK-NEON-NEXT: vmin.u32 q8, q5, q8
; CHECK-NEON-NEXT: vmovn.i32 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: utesth_f16i16_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s0, s3
; CHECK-FP16-NEXT: vcvt.u32.f16 s5, s2
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s14, s1
; CHECK-FP16-NEXT: vmovx.f16 s10, s3
; CHECK-FP16-NEXT: vmovx.f16 s8, s2
; CHECK-FP16-NEXT: vcvt.u32.f16 s10, s10
; CHECK-FP16-NEXT: vcvt.u32.f16 s8, s8
; CHECK-FP16-NEXT: vmovx.f16 s6, s1
; CHECK-FP16-NEXT: vcvt.u32.f16 s4, s4
; CHECK-FP16-NEXT: vcvt.u32.f16 s6, s6
; CHECK-FP16-NEXT: vmov.i32 q10, #0xffff
; CHECK-FP16-NEXT: vmov.32 d17[0], r0
; CHECK-FP16-NEXT: vmov r0, s5
; CHECK-FP16-NEXT: vmov.32 d16[0], r0
; CHECK-FP16-NEXT: vmov r0, s14
; CHECK-FP16-NEXT: vmov.32 d19[0], r0
; CHECK-FP16-NEXT: vmov r0, s12
; CHECK-FP16-NEXT: vmov.32 d18[0], r0
; CHECK-FP16-NEXT: vmov r0, s10
; CHECK-FP16-NEXT: vmov.32 d17[1], r0
; CHECK-FP16-NEXT: vmov r0, s8
; CHECK-FP16-NEXT: vmov.32 d16[1], r0
; CHECK-FP16-NEXT: vmov r0, s6
; CHECK-FP16-NEXT: vmin.u32 q8, q8, q10
; CHECK-FP16-NEXT: vmovn.i32 d1, q8
; CHECK-FP16-NEXT: vmov.32 d19[1], r0
; CHECK-FP16-NEXT: vmov r0, s4
; CHECK-FP16-NEXT: vmov.32 d18[1], r0
; CHECK-FP16-NEXT: vmin.u32 q9, q9, q10
; CHECK-FP16-NEXT: vmovn.i32 d0, q9
; CHECK-FP16-NEXT: bx lr
entry:
%conv = fptoui <8 x half> %x to <8 x i32>
%spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
%conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
ret <8 x i16> %conv6
}
define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NEON-LABEL: ustest_f16i16_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s7
; CHECK-NEON-NEXT: vmov.f32 s18, s6
; CHECK-NEON-NEXT: vmov.f32 s20, s5
; CHECK-NEON-NEXT: vmov.f32 s22, s4
; CHECK-NEON-NEXT: vmov.f32 s24, s3
; CHECK-NEON-NEXT: vmov.f32 s26, s2
; CHECK-NEON-NEXT: vmov.f32 s28, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r4, r0
; CHECK-NEON-NEXT: vmov r0, s26
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s22
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r6, r0
; CHECK-NEON-NEXT: vmov r0, s24
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r7, r0
; CHECK-NEON-NEXT: vmov r0, s18
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[0], r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov s22, r7
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s30, r6
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d13[1], r0
; CHECK-NEON-NEXT: vmov r0, s28
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r1, s20
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r5
; CHECK-NEON-NEXT: vcvt.s32.f32 s20, s2
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s30
; CHECK-NEON-NEXT: vmov.32 d8[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.32 d12[0], r0
; CHECK-NEON-NEXT: mov r0, r1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: vmov r0, s20
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
; CHECK-NEON-NEXT: vmov s2, r4
; CHECK-NEON-NEXT: vmov.i32 q8, #0xffff
; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2
; CHECK-NEON-NEXT: vmov.i32 q9, #0x0
; CHECK-NEON-NEXT: vmov.32 d9[0], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s22
; CHECK-NEON-NEXT: vmov.32 d12[1], r0
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmin.s32 q10, q6, q8
; CHECK-NEON-NEXT: vmax.s32 q10, q10, q9
; CHECK-NEON-NEXT: vmov.32 d9[1], r0
; CHECK-NEON-NEXT: vmov r0, s2
; CHECK-NEON-NEXT: vmovn.i32 d1, q10
; CHECK-NEON-NEXT: vmov.32 d8[1], r0
; CHECK-NEON-NEXT: vmin.s32 q8, q4, q8
; CHECK-NEON-NEXT: vmax.s32 q8, q8, q9
; CHECK-NEON-NEXT: vmovn.i32 d0, q8
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: ustest_f16i16_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s12, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s3
; CHECK-FP16-NEXT: vcvt.s32.f16 s5, s2
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: vcvt.s32.f16 s14, s1
; CHECK-FP16-NEXT: vmovx.f16 s10, s3
; CHECK-FP16-NEXT: vmovx.f16 s8, s2
; CHECK-FP16-NEXT: vcvt.s32.f16 s10, s10
; CHECK-FP16-NEXT: vcvt.s32.f16 s8, s8
; CHECK-FP16-NEXT: vmovx.f16 s6, s1
; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s6
; CHECK-FP16-NEXT: vmov.i32 q10, #0xffff
; CHECK-FP16-NEXT: vmov.i32 q11, #0x0
; CHECK-FP16-NEXT: vmov.32 d17[0], r0
; CHECK-FP16-NEXT: vmov r0, s5
; CHECK-FP16-NEXT: vmov.32 d16[0], r0
; CHECK-FP16-NEXT: vmov r0, s14
; CHECK-FP16-NEXT: vmov.32 d19[0], r0
; CHECK-FP16-NEXT: vmov r0, s12
; CHECK-FP16-NEXT: vmov.32 d18[0], r0
; CHECK-FP16-NEXT: vmov r0, s10
; CHECK-FP16-NEXT: vmov.32 d17[1], r0
; CHECK-FP16-NEXT: vmov r0, s8
; CHECK-FP16-NEXT: vmov.32 d16[1], r0
; CHECK-FP16-NEXT: vmov r0, s6
; CHECK-FP16-NEXT: vmin.s32 q8, q8, q10
; CHECK-FP16-NEXT: vmax.s32 q8, q8, q11
; CHECK-FP16-NEXT: vmovn.i32 d1, q8
; CHECK-FP16-NEXT: vmov.32 d19[1], r0
; CHECK-FP16-NEXT: vmov r0, s4
; CHECK-FP16-NEXT: vmov.32 d18[1], r0
; CHECK-FP16-NEXT: vmin.s32 q9, q9, q10
; CHECK-FP16-NEXT: vmax.s32 q9, q9, q11
; CHECK-FP16-NEXT: vmovn.i32 d0, q9
; CHECK-FP16-NEXT: bx lr
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
%spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
%spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> zeroinitializer)
%conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
ret <8 x i16> %conv6
}
; i64 saturate
define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: stest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r0, r3
; CHECK-NEXT: mov r10, #0
; CHECK-NEXT: andne r0, r2, r0, asr #31
; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: movmi r10, r3
; CHECK-NEXT: and r1, r0, r10
; CHECK-NEXT: cmn r11, #-2147483647
; CHECK-NEXT: mvn r0, #-2147483648
; CHECK-NEXT: movlo r0, r11
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mvn r8, #-2147483648
; CHECK-NEXT: vorr d0, d8, d8
; CHECK-NEXT: movmi r8, r11
; CHECK-NEXT: orrs r2, r2, r3
; CHECK-NEXT: moveq r8, r0
; CHECK-NEXT: cmn r10, #1
; CHECK-NEXT: mov r0, #-2147483648
; CHECK-NEXT: mov r9, #-2147483648
; CHECK-NEXT: movgt r0, r8
; CHECK-NEXT: cmp r8, #-2147483648
; CHECK-NEXT: movhi r9, r8
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: mov r6, r3
; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: mvn r7, #-2147483648
; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: movne r9, r0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: cmn r1, #-2147483647
; CHECK-NEXT: mvn r5, #0
; CHECK-NEXT: movlo r5, r0
; CHECK-NEXT: mvn r4, #0
; CHECK-NEXT: moveq r5, r0
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movpl r0, r4
; CHECK-NEXT: orrs r12, r2, r3
; CHECK-NEXT: moveq r0, r5
; CHECK-NEXT: cmn r1, #-2147483647
; CHECK-NEXT: mvn r5, #-2147483648
; CHECK-NEXT: movlo r5, r1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movmi r7, r1
; CHECK-NEXT: cmp r12, #0
; CHECK-NEXT: moveq r7, r5
; CHECK-NEXT: cmp r7, #-2147483648
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: movhi r1, r0
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: moveq r1, r0
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvn r6, #0
; CHECK-NEXT: movmi r6, r5
; CHECK-NEXT: cmn r11, #-2147483647
; CHECK-NEXT: movlo r4, r5
; CHECK-NEXT: moveq r4, r5
; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: movne r4, r6
; CHECK-NEXT: cmp r8, #-2147483648
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movhi r6, r4
; CHECK-NEXT: moveq r6, r4
; CHECK-NEXT: cmn r10, #1
; CHECK-NEXT: movle r4, r12
; CHECK-NEXT: cmn r5, #1
; CHECK-NEXT: moveq r4, r6
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: vmov.32 d1[0], r4
; CHECK-NEXT: movmi r6, r3
; CHECK-NEXT: cmn r6, #1
; CHECK-NEXT: movle r0, r12
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: andne r3, r2, r3, asr #31
; CHECK-NEXT: and r2, r3, r6
; CHECK-NEXT: cmn r2, #1
; CHECK-NEXT: moveq r0, r1
; CHECK-NEXT: cmn r6, #1
; CHECK-NEXT: mov r1, #-2147483648
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: movgt r1, r7
; CHECK-NEXT: cmp r7, #-2147483648
; CHECK-NEXT: mov r0, #-2147483648
; CHECK-NEXT: vmov.32 d1[1], r9
; CHECK-NEXT: movls r7, r0
; CHECK-NEXT: cmn r2, #1
; CHECK-NEXT: movne r7, r1
; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
%spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: utest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: eor r1, r2, #1
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: orr r1, r1, r3
; CHECK-NEXT: movwlo r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: moveq r7, r6
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vorr d0, d8, d8
; CHECK-NEXT: moveq r7, r1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: movne r6, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: moveq r6, r1
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: eor r4, r2, #1
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: orr r4, r4, r3
; CHECK-NEXT: movwlo r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: moveq r0, r5
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: moveq r0, r4
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: movne r5, r1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: vmov.32 d1[1], r7
; CHECK-NEXT: moveq r5, r4
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
%conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-LABEL: ustest_f64i64_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vorr q4, q0, q0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: subs r7, r2, #1
; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: eor r0, r2, #1
; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: orr r0, r0, r3
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: moveq r10, r5
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: moveq r10, r0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: movne r5, r1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: moveq r5, r0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, #1
; CHECK-NEXT: movne r0, r10
; CHECK-NEXT: mov r8, #1
; CHECK-NEXT: moveq r0, r10
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: movlo r1, r2
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movpl r2, r8
; CHECK-NEXT: mov r11, #0
; CHECK-NEXT: moveq r2, r1
; CHECK-NEXT: movpl r3, r11
; CHECK-NEXT: rsbs r1, r2, #0
; CHECK-NEXT: vorr d0, d9, d9
; CHECK-NEXT: rscs r1, r3, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: moveq r10, r7
; CHECK-NEXT: orrs r9, r2, r3
; CHECK-NEXT: moveq r10, r0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: eor r4, r2, #1
; CHECK-NEXT: orr r6, r4, r3
; CHECK-NEXT: subs r4, r2, #1
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: moveq r0, r4
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: moveq r0, r6
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: movne r4, r1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: moveq r4, r6
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r6, #1
; CHECK-NEXT: movne r1, r0
; CHECK-NEXT: moveq r1, r0
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: movlo r6, r2
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movmi r8, r2
; CHECK-NEXT: movpl r3, r11
; CHECK-NEXT: moveq r8, r6
; CHECK-NEXT: rsbs r2, r8, #0
; CHECK-NEXT: rscs r2, r3, #0
; CHECK-NEXT: movwlt r11, #1
; CHECK-NEXT: cmp r11, #0
; CHECK-NEXT: moveq r0, r11
; CHECK-NEXT: orrs r2, r8, r3
; CHECK-NEXT: moveq r0, r1
; CHECK-NEXT: cmp r11, #0
; CHECK-NEXT: movne r11, r4
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vmov.32 d1[0], r0
; CHECK-NEXT: moveq r11, r4
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: vmov.32 d0[0], r10
; CHECK-NEXT: movne r7, r5
; CHECK-NEXT: cmp r9, #0
; CHECK-NEXT: vmov.32 d1[1], r11
; CHECK-NEXT: moveq r7, r5
; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x double> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
%spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: stest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r0, r3
; CHECK-NEXT: mov r10, #0
; CHECK-NEXT: vmov.f32 s0, s16
; CHECK-NEXT: andne r0, r2, r0, asr #31
; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: movmi r10, r3
; CHECK-NEXT: and r1, r0, r10
; CHECK-NEXT: cmn r11, #-2147483647
; CHECK-NEXT: mvn r0, #-2147483648
; CHECK-NEXT: mvn r8, #-2147483648
; CHECK-NEXT: movlo r0, r11
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movmi r8, r11
; CHECK-NEXT: orrs r2, r2, r3
; CHECK-NEXT: moveq r8, r0
; CHECK-NEXT: cmn r10, #1
; CHECK-NEXT: mov r0, #-2147483648
; CHECK-NEXT: mov r9, #-2147483648
; CHECK-NEXT: movgt r0, r8
; CHECK-NEXT: cmp r8, #-2147483648
; CHECK-NEXT: movhi r9, r8
; CHECK-NEXT: cmn r1, #1
; CHECK-NEXT: mov r6, r3
; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: mvn r7, #-2147483648
; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: movne r9, r0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: cmn r1, #-2147483647
; CHECK-NEXT: mvn r5, #0
; CHECK-NEXT: movlo r5, r0
; CHECK-NEXT: mvn r4, #0
; CHECK-NEXT: moveq r5, r0
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movpl r0, r4
; CHECK-NEXT: orrs r12, r2, r3
; CHECK-NEXT: moveq r0, r5
; CHECK-NEXT: cmn r1, #-2147483647
; CHECK-NEXT: mvn r5, #-2147483648
; CHECK-NEXT: movlo r5, r1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movmi r7, r1
; CHECK-NEXT: cmp r12, #0
; CHECK-NEXT: moveq r7, r5
; CHECK-NEXT: cmp r7, #-2147483648
; CHECK-NEXT: mov r1, #0
; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: movhi r1, r0
; CHECK-NEXT: mov r12, #0
; CHECK-NEXT: moveq r1, r0
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: mvn r6, #0
; CHECK-NEXT: movmi r6, r5
; CHECK-NEXT: cmn r11, #-2147483647
; CHECK-NEXT: movlo r4, r5
; CHECK-NEXT: moveq r4, r5
; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: movne r4, r6
; CHECK-NEXT: cmp r8, #-2147483648
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movhi r6, r4
; CHECK-NEXT: moveq r6, r4
; CHECK-NEXT: cmn r10, #1
; CHECK-NEXT: movle r4, r12
; CHECK-NEXT: cmn r5, #1
; CHECK-NEXT: moveq r4, r6
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: vmov.32 d1[0], r4
; CHECK-NEXT: movmi r6, r3
; CHECK-NEXT: cmn r6, #1
; CHECK-NEXT: movle r0, r12
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: andne r3, r2, r3, asr #31
; CHECK-NEXT: and r2, r3, r6
; CHECK-NEXT: cmn r2, #1
; CHECK-NEXT: moveq r0, r1
; CHECK-NEXT: cmn r6, #1
; CHECK-NEXT: mov r1, #-2147483648
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: movgt r1, r7
; CHECK-NEXT: cmp r7, #-2147483648
; CHECK-NEXT: mov r0, #-2147483648
; CHECK-NEXT: vmov.32 d1[1], r9
; CHECK-NEXT: movls r7, r0
; CHECK-NEXT: cmn r2, #1
; CHECK-NEXT: movne r7, r1
; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
%spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: utest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vmov.f32 s0, s16
; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: eor r1, r2, #1
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: mov r6, #0
; CHECK-NEXT: movwlo r6, #1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: orr r1, r1, r3
; CHECK-NEXT: moveq r7, r6
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: moveq r7, r1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: movne r6, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: moveq r6, r1
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: eor r4, r2, #1
; CHECK-NEXT: subs r2, r2, #1
; CHECK-NEXT: sbcs r2, r3, #0
; CHECK-NEXT: orr r4, r4, r3
; CHECK-NEXT: movwlo r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: moveq r0, r5
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: moveq r0, r4
; CHECK-NEXT: vmov.32 d1[0], r6
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 d0[0], r0
; CHECK-NEXT: movne r5, r1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: vmov.32 d1[1], r7
; CHECK-NEXT: moveq r5, r4
; CHECK-NEXT: vmov.32 d0[1], r5
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
%conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-LABEL: ustest_f32i64_mm:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
; CHECK-NEXT: .vsave {d8}
; CHECK-NEXT: vpush {d8}
; CHECK-NEXT: vmov.f64 d8, d0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: subs r7, r2, #1
; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: eor r0, r2, #1
; CHECK-NEXT: sbcs r7, r3, #0
; CHECK-NEXT: mov r5, #0
; CHECK-NEXT: orr r0, r0, r3
; CHECK-NEXT: movwlt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: moveq r10, r5
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: moveq r10, r0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: movne r5, r1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: moveq r5, r0
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: vmov.f32 s0, s17
; CHECK-NEXT: movne r0, r10
; CHECK-NEXT: mov r1, #1
; CHECK-NEXT: moveq r0, r10
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: movlo r1, r2
; CHECK-NEXT: mov r8, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r11, #0
; CHECK-NEXT: movpl r2, r8
; CHECK-NEXT: movpl r3, r11
; CHECK-NEXT: moveq r2, r1
; CHECK-NEXT: rsbs r1, r2, #0
; CHECK-NEXT: rscs r1, r3, #0
; CHECK-NEXT: mov r7, #0
; CHECK-NEXT: movwlt r7, #1
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: moveq r10, r7
; CHECK-NEXT: orrs r9, r2, r3
; CHECK-NEXT: moveq r10, r0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: eor r4, r2, #1
; CHECK-NEXT: orr r6, r4, r3
; CHECK-NEXT: subs r4, r2, #1
; CHECK-NEXT: sbcs r4, r3, #0
; CHECK-NEXT: mov r4, #0
; CHECK-NEXT: movwlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: moveq r0, r4
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: moveq r0, r6
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: movne r4, r1
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: moveq r4, r6
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: mov r6, #1
; CHECK-NEXT: movne r1, r0
; CHECK-NEXT: moveq r1, r0
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: movlo r6, r2
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: movmi r8, r2
; CHECK-NEXT: movpl r3, r11
; CHECK-NEXT: moveq r8, r6
; CHECK-NEXT: rsbs r2, r8, #0
; CHECK-NEXT: rscs r2, r3, #0
; CHECK-NEXT: movwlt r11, #1
; CHECK-NEXT: cmp r11, #0
; CHECK-NEXT: moveq r0, r11
; CHECK-NEXT: orrs r2, r8, r3
; CHECK-NEXT: moveq r0, r1
; CHECK-NEXT: cmp r11, #0
; CHECK-NEXT: movne r11, r4
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vmov.32 d1[0], r0
; CHECK-NEXT: moveq r11, r4
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: vmov.32 d0[0], r10
; CHECK-NEXT: movne r7, r5
; CHECK-NEXT: cmp r9, #0
; CHECK-NEXT: vmov.32 d1[1], r11
; CHECK-NEXT: moveq r7, r5
; CHECK-NEXT: vmov.32 d0[1], r7
; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x float> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
%spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-LABEL: stest_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: .pad #4
; CHECK-NEON-NEXT: sub sp, sp, #4
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: .pad #16
; CHECK-NEON-NEXT: sub sp, sp, #16
; CHECK-NEON-NEXT: vmov r0, s1
; CHECK-NEON-NEXT: vmov.f32 s16, s0
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: mov r0, r3
; CHECK-NEON-NEXT: mov r10, #0
; CHECK-NEON-NEXT: andne r0, r2, r0, asr #31
; CHECK-NEON-NEXT: mov r11, r1
; CHECK-NEON-NEXT: movmi r10, r3
; CHECK-NEON-NEXT: and r1, r0, r10
; CHECK-NEON-NEXT: cmn r11, #-2147483647
; CHECK-NEON-NEXT: mvn r0, #-2147483648
; CHECK-NEON-NEXT: movlo r0, r11
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: mvn r8, #-2147483648
; CHECK-NEON-NEXT: mov r9, #-2147483648
; CHECK-NEON-NEXT: movmi r8, r11
; CHECK-NEON-NEXT: orrs r2, r2, r3
; CHECK-NEON-NEXT: moveq r8, r0
; CHECK-NEON-NEXT: cmn r10, #1
; CHECK-NEON-NEXT: mov r0, #-2147483648
; CHECK-NEON-NEXT: mov r6, r3
; CHECK-NEON-NEXT: movgt r0, r8
; CHECK-NEON-NEXT: cmp r8, #-2147483648
; CHECK-NEON-NEXT: movhi r9, r8
; CHECK-NEON-NEXT: cmn r1, #1
; CHECK-NEON-NEXT: movne r9, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-NEON-NEXT: mvn r7, #-2147483648
; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: cmn r1, #-2147483647
; CHECK-NEON-NEXT: mvn r5, #0
; CHECK-NEON-NEXT: movlo r5, r0
; CHECK-NEON-NEXT: mvn r4, #0
; CHECK-NEON-NEXT: moveq r5, r0
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: movpl r0, r4
; CHECK-NEON-NEXT: orrs r12, r2, r3
; CHECK-NEON-NEXT: moveq r0, r5
; CHECK-NEON-NEXT: cmn r1, #-2147483647
; CHECK-NEON-NEXT: mvn r5, #-2147483648
; CHECK-NEON-NEXT: movlo r5, r1
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: movmi r7, r1
; CHECK-NEON-NEXT: cmp r12, #0
; CHECK-NEON-NEXT: moveq r7, r5
; CHECK-NEON-NEXT: cmp r7, #-2147483648
; CHECK-NEON-NEXT: mov r1, #0
; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
; CHECK-NEON-NEXT: movhi r1, r0
; CHECK-NEON-NEXT: mov r12, #0
; CHECK-NEON-NEXT: moveq r1, r0
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: mvn r6, #0
; CHECK-NEON-NEXT: movmi r6, r5
; CHECK-NEON-NEXT: cmn r11, #-2147483647
; CHECK-NEON-NEXT: movlo r4, r5
; CHECK-NEON-NEXT: moveq r4, r5
; CHECK-NEON-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-NEON-NEXT: movne r4, r6
; CHECK-NEON-NEXT: cmp r8, #-2147483648
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: movhi r6, r4
; CHECK-NEON-NEXT: moveq r6, r4
; CHECK-NEON-NEXT: cmn r10, #1
; CHECK-NEON-NEXT: movle r4, r12
; CHECK-NEON-NEXT: cmn r5, #1
; CHECK-NEON-NEXT: moveq r4, r6
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: vmov.32 d1[0], r4
; CHECK-NEON-NEXT: movmi r6, r3
; CHECK-NEON-NEXT: cmn r6, #1
; CHECK-NEON-NEXT: movle r0, r12
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: andne r3, r2, r3, asr #31
; CHECK-NEON-NEXT: and r2, r3, r6
; CHECK-NEON-NEXT: cmn r2, #1
; CHECK-NEON-NEXT: moveq r0, r1
; CHECK-NEON-NEXT: cmn r6, #1
; CHECK-NEON-NEXT: mov r1, #-2147483648
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
; CHECK-NEON-NEXT: movgt r1, r7
; CHECK-NEON-NEXT: cmp r7, #-2147483648
; CHECK-NEON-NEXT: mov r0, #-2147483648
; CHECK-NEON-NEXT: vmov.32 d1[1], r9
; CHECK-NEON-NEXT: movls r7, r0
; CHECK-NEON-NEXT: cmn r2, #1
; CHECK-NEON-NEXT: movne r7, r1
; CHECK-NEON-NEXT: vmov.32 d0[1], r7
; CHECK-NEON-NEXT: add sp, sp, #16
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: add sp, sp, #4
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-FP16-LABEL: stest_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FP16-NEXT: .pad #4
; CHECK-FP16-NEXT: sub sp, sp, #4
; CHECK-FP16-NEXT: .vsave {d8}
; CHECK-FP16-NEXT: vpush {d8}
; CHECK-FP16-NEXT: .pad #16
; CHECK-FP16-NEXT: sub sp, sp, #16
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vorr d8, d0, d0
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: mov r0, r3
; CHECK-FP16-NEXT: mov r10, #0
; CHECK-FP16-NEXT: andne r0, r2, r0, asr #31
; CHECK-FP16-NEXT: mov r11, r1
; CHECK-FP16-NEXT: movmi r10, r3
; CHECK-FP16-NEXT: and r1, r0, r10
; CHECK-FP16-NEXT: cmn r11, #-2147483647
; CHECK-FP16-NEXT: mvn r0, #-2147483648
; CHECK-FP16-NEXT: movlo r0, r11
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: mvn r8, #-2147483648
; CHECK-FP16-NEXT: mov r9, #-2147483648
; CHECK-FP16-NEXT: movmi r8, r11
; CHECK-FP16-NEXT: orrs r2, r2, r3
; CHECK-FP16-NEXT: moveq r8, r0
; CHECK-FP16-NEXT: cmn r10, #1
; CHECK-FP16-NEXT: mov r0, #-2147483648
; CHECK-FP16-NEXT: mov r6, r3
; CHECK-FP16-NEXT: movgt r0, r8
; CHECK-FP16-NEXT: cmp r8, #-2147483648
; CHECK-FP16-NEXT: movhi r9, r8
; CHECK-FP16-NEXT: cmn r1, #1
; CHECK-FP16-NEXT: movne r9, r0
; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-FP16-NEXT: mvn r7, #-2147483648
; CHECK-FP16-NEXT: str r2, [sp, #4] @ 4-byte Spill
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: cmn r1, #-2147483647
; CHECK-FP16-NEXT: mvn r5, #0
; CHECK-FP16-NEXT: movlo r5, r0
; CHECK-FP16-NEXT: mvn r4, #0
; CHECK-FP16-NEXT: moveq r5, r0
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: movpl r0, r4
; CHECK-FP16-NEXT: orrs r12, r2, r3
; CHECK-FP16-NEXT: moveq r0, r5
; CHECK-FP16-NEXT: cmn r1, #-2147483647
; CHECK-FP16-NEXT: mvn r5, #-2147483648
; CHECK-FP16-NEXT: movlo r5, r1
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: movmi r7, r1
; CHECK-FP16-NEXT: cmp r12, #0
; CHECK-FP16-NEXT: moveq r7, r5
; CHECK-FP16-NEXT: cmp r7, #-2147483648
; CHECK-FP16-NEXT: mov r1, #0
; CHECK-FP16-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
; CHECK-FP16-NEXT: movhi r1, r0
; CHECK-FP16-NEXT: mov r12, #0
; CHECK-FP16-NEXT: moveq r1, r0
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: mvn r6, #0
; CHECK-FP16-NEXT: movmi r6, r5
; CHECK-FP16-NEXT: cmn r11, #-2147483647
; CHECK-FP16-NEXT: movlo r4, r5
; CHECK-FP16-NEXT: moveq r4, r5
; CHECK-FP16-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
; CHECK-FP16-NEXT: movne r4, r6
; CHECK-FP16-NEXT: cmp r8, #-2147483648
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: movhi r6, r4
; CHECK-FP16-NEXT: moveq r6, r4
; CHECK-FP16-NEXT: cmn r10, #1
; CHECK-FP16-NEXT: movle r4, r12
; CHECK-FP16-NEXT: cmn r5, #1
; CHECK-FP16-NEXT: moveq r4, r6
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: vmov.32 d1[0], r4
; CHECK-FP16-NEXT: movmi r6, r3
; CHECK-FP16-NEXT: cmn r6, #1
; CHECK-FP16-NEXT: movle r0, r12
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: andne r3, r2, r3, asr #31
; CHECK-FP16-NEXT: and r2, r3, r6
; CHECK-FP16-NEXT: cmn r2, #1
; CHECK-FP16-NEXT: moveq r0, r1
; CHECK-FP16-NEXT: cmn r6, #1
; CHECK-FP16-NEXT: mov r1, #-2147483648
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
; CHECK-FP16-NEXT: movgt r1, r7
; CHECK-FP16-NEXT: cmp r7, #-2147483648
; CHECK-FP16-NEXT: mov r0, #-2147483648
; CHECK-FP16-NEXT: vmov.32 d1[1], r9
; CHECK-FP16-NEXT: movls r7, r0
; CHECK-FP16-NEXT: cmn r2, #1
; CHECK-FP16-NEXT: movne r7, r1
; CHECK-FP16-NEXT: vmov.32 d0[1], r7
; CHECK-FP16-NEXT: add sp, sp, #16
; CHECK-FP16-NEXT: vpop {d8}
; CHECK-FP16-NEXT: add sp, sp, #4
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
%spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-LABEL: utesth_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: mov r5, r0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixunssfti
; CHECK-NEON-NEXT: mov r7, r1
; CHECK-NEON-NEXT: eor r1, r2, #1
; CHECK-NEON-NEXT: subs r2, r2, #1
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: sbcs r2, r3, #0
; CHECK-NEON-NEXT: orr r1, r1, r3
; CHECK-NEON-NEXT: movwlo r6, #1
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: moveq r7, r6
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: vmov s0, r5
; CHECK-NEON-NEXT: moveq r7, r1
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: mov r5, #0
; CHECK-NEON-NEXT: movne r6, r0
; CHECK-NEON-NEXT: cmp r1, #0
; CHECK-NEON-NEXT: moveq r6, r1
; CHECK-NEON-NEXT: bl __fixunssfti
; CHECK-NEON-NEXT: eor r4, r2, #1
; CHECK-NEON-NEXT: subs r2, r2, #1
; CHECK-NEON-NEXT: sbcs r2, r3, #0
; CHECK-NEON-NEXT: orr r4, r4, r3
; CHECK-NEON-NEXT: movwlo r5, #1
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: moveq r0, r5
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: moveq r0, r4
; CHECK-NEON-NEXT: vmov.32 d1[0], r6
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: vmov.32 d0[0], r0
; CHECK-NEON-NEXT: movne r5, r1
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: vmov.32 d1[1], r7
; CHECK-NEON-NEXT: moveq r5, r4
; CHECK-NEON-NEXT: vmov.32 d0[1], r5
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
; CHECK-FP16-LABEL: utesth_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
; CHECK-FP16-NEXT: vmov.u16 r5, d0[0]
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixunshfti
; CHECK-FP16-NEXT: mov r7, r1
; CHECK-FP16-NEXT: eor r1, r2, #1
; CHECK-FP16-NEXT: subs r2, r2, #1
; CHECK-FP16-NEXT: mov r6, #0
; CHECK-FP16-NEXT: sbcs r2, r3, #0
; CHECK-FP16-NEXT: orr r1, r1, r3
; CHECK-FP16-NEXT: movwlo r6, #1
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: moveq r7, r6
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: vmov s0, r5
; CHECK-FP16-NEXT: moveq r7, r1
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: movne r6, r0
; CHECK-FP16-NEXT: cmp r1, #0
; CHECK-FP16-NEXT: moveq r6, r1
; CHECK-FP16-NEXT: bl __fixunshfti
; CHECK-FP16-NEXT: eor r4, r2, #1
; CHECK-FP16-NEXT: subs r2, r2, #1
; CHECK-FP16-NEXT: sbcs r2, r3, #0
; CHECK-FP16-NEXT: orr r4, r4, r3
; CHECK-FP16-NEXT: movwlo r5, #1
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: moveq r0, r5
; CHECK-FP16-NEXT: cmp r4, #0
; CHECK-FP16-NEXT: moveq r0, r4
; CHECK-FP16-NEXT: vmov.32 d1[0], r6
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: vmov.32 d0[0], r0
; CHECK-FP16-NEXT: movne r5, r1
; CHECK-FP16-NEXT: cmp r4, #0
; CHECK-FP16-NEXT: vmov.32 d1[1], r7
; CHECK-FP16-NEXT: moveq r5, r4
; CHECK-FP16-NEXT: vmov.32 d0[1], r5
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc}
entry:
%conv = fptoui <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
%conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
ret <2 x i64> %conv6
}
define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-LABEL: ustest_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEON-NEXT: .pad #4
; CHECK-NEON-NEXT: sub sp, sp, #4
; CHECK-NEON-NEXT: .vsave {d8}
; CHECK-NEON-NEXT: vpush {d8}
; CHECK-NEON-NEXT: vmov r0, s0
; CHECK-NEON-NEXT: vmov.f32 s16, s1
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: mov r8, r0
; CHECK-NEON-NEXT: eor r0, r2, #1
; CHECK-NEON-NEXT: mov r5, r2
; CHECK-NEON-NEXT: subs r2, r2, #1
; CHECK-NEON-NEXT: sbcs r2, r3, #0
; CHECK-NEON-NEXT: mov r4, #0
; CHECK-NEON-NEXT: movwlt r4, #1
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: orr r0, r0, r3
; CHECK-NEON-NEXT: moveq r8, r4
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: mov r10, #1
; CHECK-NEON-NEXT: moveq r8, r0
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: movne r4, r1
; CHECK-NEON-NEXT: cmp r0, #0
; CHECK-NEON-NEXT: moveq r4, r0
; CHECK-NEON-NEXT: cmp r4, #0
; CHECK-NEON-NEXT: mov r7, r4
; CHECK-NEON-NEXT: mov r0, #1
; CHECK-NEON-NEXT: movne r7, r8
; CHECK-NEON-NEXT: mov r6, r3
; CHECK-NEON-NEXT: moveq r7, r8
; CHECK-NEON-NEXT: cmp r5, #1
; CHECK-NEON-NEXT: movlo r0, r5
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: movpl r5, r10
; CHECK-NEON-NEXT: mov r9, #0
; CHECK-NEON-NEXT: moveq r5, r0
; CHECK-NEON-NEXT: movpl r6, r9
; CHECK-NEON-NEXT: rsbs r0, r5, #0
; CHECK-NEON-NEXT: mov r11, #0
; CHECK-NEON-NEXT: rscs r0, r6, #0
; CHECK-NEON-NEXT: vmov r0, s16
; CHECK-NEON-NEXT: movwlt r11, #1
; CHECK-NEON-NEXT: cmp r11, #0
; CHECK-NEON-NEXT: moveq r8, r11
; CHECK-NEON-NEXT: bl __aeabi_h2f
; CHECK-NEON-NEXT: vmov s0, r0
; CHECK-NEON-NEXT: orrs r5, r5, r6
; CHECK-NEON-NEXT: moveq r8, r7
; CHECK-NEON-NEXT: bl __fixsfti
; CHECK-NEON-NEXT: subs r6, r2, #1
; CHECK-NEON-NEXT: eor r7, r2, #1
; CHECK-NEON-NEXT: sbcs r6, r3, #0
; CHECK-NEON-NEXT: orr r7, r7, r3
; CHECK-NEON-NEXT: mov r6, #0
; CHECK-NEON-NEXT: movwlt r6, #1
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: moveq r0, r6
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: moveq r0, r7
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: movne r6, r1
; CHECK-NEON-NEXT: cmp r7, #0
; CHECK-NEON-NEXT: moveq r6, r7
; CHECK-NEON-NEXT: cmp r6, #0
; CHECK-NEON-NEXT: mov r1, r6
; CHECK-NEON-NEXT: mov r7, #1
; CHECK-NEON-NEXT: movne r1, r0
; CHECK-NEON-NEXT: moveq r1, r0
; CHECK-NEON-NEXT: cmp r2, #1
; CHECK-NEON-NEXT: movlo r7, r2
; CHECK-NEON-NEXT: cmp r3, #0
; CHECK-NEON-NEXT: movmi r10, r2
; CHECK-NEON-NEXT: movpl r3, r9
; CHECK-NEON-NEXT: moveq r10, r7
; CHECK-NEON-NEXT: rsbs r2, r10, #0
; CHECK-NEON-NEXT: rscs r2, r3, #0
; CHECK-NEON-NEXT: movwlt r9, #1
; CHECK-NEON-NEXT: cmp r9, #0
; CHECK-NEON-NEXT: moveq r0, r9
; CHECK-NEON-NEXT: orrs r2, r10, r3
; CHECK-NEON-NEXT: moveq r0, r1
; CHECK-NEON-NEXT: cmp r9, #0
; CHECK-NEON-NEXT: movne r9, r6
; CHECK-NEON-NEXT: cmp r2, #0
; CHECK-NEON-NEXT: vmov.32 d1[0], r0
; CHECK-NEON-NEXT: moveq r9, r6
; CHECK-NEON-NEXT: cmp r11, #0
; CHECK-NEON-NEXT: vmov.32 d0[0], r8
; CHECK-NEON-NEXT: movne r11, r4
; CHECK-NEON-NEXT: cmp r5, #0
; CHECK-NEON-NEXT: vmov.32 d1[1], r9
; CHECK-NEON-NEXT: moveq r11, r4
; CHECK-NEON-NEXT: vmov.32 d0[1], r11
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: add sp, sp, #4
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-FP16-LABEL: ustest_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FP16-NEXT: .pad #4
; CHECK-FP16-NEXT: sub sp, sp, #4
; CHECK-FP16-NEXT: .vsave {d8}
; CHECK-FP16-NEXT: vpush {d8}
; CHECK-FP16-NEXT: vmov.u16 r0, d0[0]
; CHECK-FP16-NEXT: vorr d8, d0, d0
; CHECK-FP16-NEXT: vmov s0, r0
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: subs r7, r2, #1
; CHECK-FP16-NEXT: mov r10, r0
; CHECK-FP16-NEXT: eor r0, r2, #1
; CHECK-FP16-NEXT: sbcs r7, r3, #0
; CHECK-FP16-NEXT: mov r5, #0
; CHECK-FP16-NEXT: orr r0, r0, r3
; CHECK-FP16-NEXT: movwlt r5, #1
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: moveq r10, r5
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: moveq r10, r0
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: movne r5, r1
; CHECK-FP16-NEXT: cmp r0, #0
; CHECK-FP16-NEXT: moveq r5, r0
; CHECK-FP16-NEXT: cmp r5, #0
; CHECK-FP16-NEXT: mov r0, r5
; CHECK-FP16-NEXT: mov r1, #1
; CHECK-FP16-NEXT: movne r0, r10
; CHECK-FP16-NEXT: mov r8, #1
; CHECK-FP16-NEXT: moveq r0, r10
; CHECK-FP16-NEXT: cmp r2, #1
; CHECK-FP16-NEXT: movlo r1, r2
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: movpl r2, r8
; CHECK-FP16-NEXT: mov r11, #0
; CHECK-FP16-NEXT: moveq r2, r1
; CHECK-FP16-NEXT: movpl r3, r11
; CHECK-FP16-NEXT: rsbs r1, r2, #0
; CHECK-FP16-NEXT: mov r7, #0
; CHECK-FP16-NEXT: rscs r1, r3, #0
; CHECK-FP16-NEXT: vmov.u16 r1, d8[1]
; CHECK-FP16-NEXT: movwlt r7, #1
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: moveq r10, r7
; CHECK-FP16-NEXT: orrs r9, r2, r3
; CHECK-FP16-NEXT: moveq r10, r0
; CHECK-FP16-NEXT: vmov s0, r1
; CHECK-FP16-NEXT: bl __fixhfti
; CHECK-FP16-NEXT: eor r4, r2, #1
; CHECK-FP16-NEXT: orr r6, r4, r3
; CHECK-FP16-NEXT: subs r4, r2, #1
; CHECK-FP16-NEXT: sbcs r4, r3, #0
; CHECK-FP16-NEXT: mov r4, #0
; CHECK-FP16-NEXT: movwlt r4, #1
; CHECK-FP16-NEXT: cmp r4, #0
; CHECK-FP16-NEXT: moveq r0, r4
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: moveq r0, r6
; CHECK-FP16-NEXT: cmp r4, #0
; CHECK-FP16-NEXT: movne r4, r1
; CHECK-FP16-NEXT: cmp r6, #0
; CHECK-FP16-NEXT: moveq r4, r6
; CHECK-FP16-NEXT: cmp r4, #0
; CHECK-FP16-NEXT: mov r1, r4
; CHECK-FP16-NEXT: mov r6, #1
; CHECK-FP16-NEXT: movne r1, r0
; CHECK-FP16-NEXT: moveq r1, r0
; CHECK-FP16-NEXT: cmp r2, #1
; CHECK-FP16-NEXT: movlo r6, r2
; CHECK-FP16-NEXT: cmp r3, #0
; CHECK-FP16-NEXT: movmi r8, r2
; CHECK-FP16-NEXT: movpl r3, r11
; CHECK-FP16-NEXT: moveq r8, r6
; CHECK-FP16-NEXT: rsbs r2, r8, #0
; CHECK-FP16-NEXT: rscs r2, r3, #0
; CHECK-FP16-NEXT: movwlt r11, #1
; CHECK-FP16-NEXT: cmp r11, #0
; CHECK-FP16-NEXT: moveq r0, r11
; CHECK-FP16-NEXT: orrs r2, r8, r3
; CHECK-FP16-NEXT: moveq r0, r1
; CHECK-FP16-NEXT: cmp r11, #0
; CHECK-FP16-NEXT: movne r11, r4
; CHECK-FP16-NEXT: cmp r2, #0
; CHECK-FP16-NEXT: vmov.32 d1[0], r0
; CHECK-FP16-NEXT: moveq r11, r4
; CHECK-FP16-NEXT: cmp r7, #0
; CHECK-FP16-NEXT: vmov.32 d0[0], r10
; CHECK-FP16-NEXT: movne r7, r5
; CHECK-FP16-NEXT: cmp r9, #0
; CHECK-FP16-NEXT: vmov.32 d1[1], r11
; CHECK-FP16-NEXT: moveq r7, r5
; CHECK-FP16-NEXT: vmov.32 d0[1], r7
; CHECK-FP16-NEXT: vpop {d8}
; CHECK-FP16-NEXT: add sp, sp, #4
; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%conv = fptosi <2 x half> %x to <2 x i128>
%spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
%spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
%conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
ret <2 x i64> %conv6
}
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
declare <2 x i128> @llvm.smin.v2i128(<2 x i128>, <2 x i128>)
declare <2 x i128> @llvm.smax.v2i128(<2 x i128>, <2 x i128>)
declare <2 x i128> @llvm.umin.v2i128(<2 x i128>, <2 x i128>)