This reverts commit 9c319d5bb4.
Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.
2162 lines
85 KiB
LLVM
2162 lines
85 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
|
|
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
|
|
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
|
|
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
|
|
|
|
define fp128 @f128_fp128(fp128 %a, fp128 %b, fp128 %d, fp128 %e) {
|
|
; CHECK-SD-LABEL: f128_fp128:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: sub sp, sp, #48
|
|
; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: stp q2, q3, [sp] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: b.ge .LBB0_2
|
|
; CHECK-SD-NEXT: // %bb.1: // %entry
|
|
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: .LBB0_2: // %entry
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: add sp, sp, #48
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: f128_fp128:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: sub sp, sp, #48
|
|
; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: stp q3, q2, [sp] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov d0, v2.d[1]
|
|
; CHECK-GI-NEXT: mov d1, v3.d[1]
|
|
; CHECK-GI-NEXT: fcsel d2, d2, d3, lt
|
|
; CHECK-GI-NEXT: fmov x8, d2
|
|
; CHECK-GI-NEXT: fcsel d1, d0, d1, lt
|
|
; CHECK-GI-NEXT: mov v0.d[0], x8
|
|
; CHECK-GI-NEXT: fmov x8, d1
|
|
; CHECK-GI-NEXT: mov v0.d[1], x8
|
|
; CHECK-GI-NEXT: add sp, sp, #48
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt fp128 %a, %b
|
|
%s = select i1 %c, fp128 %d, fp128 %e
|
|
ret fp128 %s
|
|
}
|
|
|
|
define i128 @f128_i128(fp128 %a, fp128 %b, i128 %d, i128 %e) {
|
|
; CHECK-SD-LABEL: f128_i128:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: sub sp, sp, #80
|
|
; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
|
|
; CHECK-SD-NEXT: .cfi_offset w19, -8
|
|
; CHECK-SD-NEXT: .cfi_offset w20, -16
|
|
; CHECK-SD-NEXT: .cfi_offset w21, -24
|
|
; CHECK-SD-NEXT: .cfi_offset w22, -32
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -48
|
|
; CHECK-SD-NEXT: mov x19, x3
|
|
; CHECK-SD-NEXT: mov x20, x2
|
|
; CHECK-SD-NEXT: mov x21, x1
|
|
; CHECK-SD-NEXT: mov x22, x0
|
|
; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: csel x20, x22, x20, lt
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: mov w8, w0
|
|
; CHECK-SD-NEXT: mov x0, x20
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: cmp w8, #0
|
|
; CHECK-SD-NEXT: csel x1, x21, x19, lt
|
|
; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: add sp, sp, #80
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: f128_i128:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
|
|
; CHECK-GI-NEXT: .cfi_offset w19, -8
|
|
; CHECK-GI-NEXT: .cfi_offset w20, -16
|
|
; CHECK-GI-NEXT: .cfi_offset w21, -24
|
|
; CHECK-GI-NEXT: .cfi_offset w22, -32
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -48
|
|
; CHECK-GI-NEXT: mov x19, x0
|
|
; CHECK-GI-NEXT: mov x20, x1
|
|
; CHECK-GI-NEXT: mov x21, x2
|
|
; CHECK-GI-NEXT: mov x22, x3
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: csel x0, x19, x21, lt
|
|
; CHECK-GI-NEXT: csel x1, x20, x22, lt
|
|
; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt fp128 %a, %b
|
|
%s = select i1 %c, i128 %d, i128 %e
|
|
ret i128 %s
|
|
}
|
|
|
|
define double @f128_double(fp128 %a, fp128 %b, double %d, double %e) {
|
|
; CHECK-SD-LABEL: f128_double:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: .cfi_offset b8, -24
|
|
; CHECK-SD-NEXT: .cfi_offset b9, -32
|
|
; CHECK-SD-NEXT: fmov d8, d3
|
|
; CHECK-SD-NEXT: fmov d9, d2
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: fcsel d0, d9, d8, lt
|
|
; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: f128_double:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: .cfi_offset b8, -24
|
|
; CHECK-GI-NEXT: .cfi_offset b9, -32
|
|
; CHECK-GI-NEXT: fmov d8, d2
|
|
; CHECK-GI-NEXT: fmov d9, d3
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: fcsel d0, d8, d9, lt
|
|
; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt fp128 %a, %b
|
|
%s = select i1 %c, double %d, double %e
|
|
ret double %s
|
|
}
|
|
|
|
define float @f128_float(fp128 %a, fp128 %b, float %d, float %e) {
|
|
; CHECK-SD-LABEL: f128_float:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: .cfi_offset b8, -24
|
|
; CHECK-SD-NEXT: .cfi_offset b9, -32
|
|
; CHECK-SD-NEXT: fmov s8, s3
|
|
; CHECK-SD-NEXT: fmov s9, s2
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: fcsel s0, s9, s8, lt
|
|
; CHECK-SD-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: f128_float:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: .cfi_offset b8, -24
|
|
; CHECK-GI-NEXT: .cfi_offset b9, -32
|
|
; CHECK-GI-NEXT: fmov s8, s2
|
|
; CHECK-GI-NEXT: fmov s9, s3
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: fcsel s0, s8, s9, lt
|
|
; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt fp128 %a, %b
|
|
%s = select i1 %c, float %d, float %e
|
|
ret float %s
|
|
}
|
|
|
|
define i32 @f128_i32(fp128 %a, fp128 %b, i32 %d, i32 %e) {
|
|
; CHECK-SD-LABEL: f128_i32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-SD-NEXT: .cfi_offset w19, -8
|
|
; CHECK-SD-NEXT: .cfi_offset w20, -16
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -32
|
|
; CHECK-SD-NEXT: mov w19, w1
|
|
; CHECK-SD-NEXT: mov w20, w0
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: csel w0, w20, w19, lt
|
|
; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: f128_i32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-GI-NEXT: .cfi_offset w19, -8
|
|
; CHECK-GI-NEXT: .cfi_offset w20, -16
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -32
|
|
; CHECK-GI-NEXT: mov w19, w0
|
|
; CHECK-GI-NEXT: mov w20, w1
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: csel w0, w19, w20, lt
|
|
; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt fp128 %a, %b
|
|
%s = select i1 %c, i32 %d, i32 %e
|
|
ret i32 %s
|
|
}
|
|
|
|
define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) {
|
|
; CHECK-SD-NOFP16-LABEL: f128_half:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-SD-NOFP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-SD-NOFP16-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-SD-NOFP16-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NOFP16-NEXT: .cfi_offset b8, -24
|
|
; CHECK-SD-NOFP16-NEXT: .cfi_offset b9, -32
|
|
; CHECK-SD-NOFP16-NEXT: fmov s8, s3
|
|
; CHECK-SD-NOFP16-NEXT: fmov s9, s2
|
|
; CHECK-SD-NOFP16-NEXT: bl __lttf2
|
|
; CHECK-SD-NOFP16-NEXT: cmp w0, #0
|
|
; CHECK-SD-NOFP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-SD-NOFP16-NEXT: fcsel s0, s9, s8, lt
|
|
; CHECK-SD-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-SD-NOFP16-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: f128_half:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-SD-FP16-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-SD-FP16-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-SD-FP16-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-FP16-NEXT: .cfi_offset b8, -24
|
|
; CHECK-SD-FP16-NEXT: .cfi_offset b9, -32
|
|
; CHECK-SD-FP16-NEXT: fmov s8, s3
|
|
; CHECK-SD-FP16-NEXT: fmov s9, s2
|
|
; CHECK-SD-FP16-NEXT: bl __lttf2
|
|
; CHECK-SD-FP16-NEXT: cmp w0, #0
|
|
; CHECK-SD-FP16-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-SD-FP16-NEXT: fcsel h0, h9, h8, lt
|
|
; CHECK-SD-FP16-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: f128_half:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: .cfi_offset b8, -24
|
|
; CHECK-GI-NEXT: .cfi_offset b9, -32
|
|
; CHECK-GI-NEXT: fmov s8, s2
|
|
; CHECK-GI-NEXT: fmov s9, s3
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: fmov w8, s8
|
|
; CHECK-GI-NEXT: fmov w9, s9
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: csel w8, w8, w9, lt
|
|
; CHECK-GI-NEXT: fmov s0, w8
|
|
; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-GI-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt fp128 %a, %b
|
|
%s = select i1 %c, half %d, half %e
|
|
ret half %s
|
|
}
|
|
|
|
define double @f64_double(double %a, double %b, double %d, double %e) {
|
|
; CHECK-LABEL: f64_double:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmp d0, d1
|
|
; CHECK-NEXT: fcsel d0, d2, d3, mi
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt double %a, %b
|
|
%s = select i1 %c, double %d, double %e
|
|
ret double %s
|
|
}
|
|
|
|
define i32 @f64_i32(double %a, double %b, i32 %d, i32 %e) {
|
|
; CHECK-LABEL: f64_i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmp d0, d1
|
|
; CHECK-NEXT: csel w0, w0, w1, mi
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt double %a, %b
|
|
%s = select i1 %c, i32 %d, i32 %e
|
|
ret i32 %s
|
|
}
|
|
|
|
define float @f32_float(float %a, float %b, float %d, float %e) {
|
|
; CHECK-LABEL: f32_float:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmp s0, s1
|
|
; CHECK-NEXT: fcsel s0, s2, s3, mi
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt float %a, %b
|
|
%s = select i1 %c, float %d, float %e
|
|
ret float %s
|
|
}
|
|
|
|
define i32 @f32_i32(float %a, float %b, i32 %d, i32 %e) {
|
|
; CHECK-LABEL: f32_i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmp s0, s1
|
|
; CHECK-NEXT: csel w0, w0, w1, mi
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt float %a, %b
|
|
%s = select i1 %c, i32 %d, i32 %e
|
|
ret i32 %s
|
|
}
|
|
|
|
define half @f16_half(half %a, half %b, half %d, half %e) {
|
|
; CHECK-SD-NOFP16-LABEL: f16_half:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: // kill: def $h3 killed $h3 def $s3
|
|
; CHECK-SD-NOFP16-NEXT: // kill: def $h2 killed $h2 def $s2
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: fcsel s0, s2, s3, mi
|
|
; CHECK-SD-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: f16_half:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmp h0, h1
|
|
; CHECK-SD-FP16-NEXT: fcsel h0, h2, h3, mi
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: f16_half:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-GI-NOFP16-NEXT: // kill: def $h2 killed $h2 def $s2
|
|
; CHECK-GI-NOFP16-NEXT: // kill: def $h3 killed $h3 def $s3
|
|
; CHECK-GI-NOFP16-NEXT: fmov w8, s2
|
|
; CHECK-GI-NOFP16-NEXT: fmov w9, s3
|
|
; CHECK-GI-NOFP16-NEXT: fcmp s0, s1
|
|
; CHECK-GI-NOFP16-NEXT: csel w8, w8, w9, mi
|
|
; CHECK-GI-NOFP16-NEXT: fmov s0, w8
|
|
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: f16_half:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: // kill: def $h2 killed $h2 def $s2
|
|
; CHECK-GI-FP16-NEXT: // kill: def $h3 killed $h3 def $s3
|
|
; CHECK-GI-FP16-NEXT: fcmp h0, h1
|
|
; CHECK-GI-FP16-NEXT: fmov w8, s2
|
|
; CHECK-GI-FP16-NEXT: fmov w9, s3
|
|
; CHECK-GI-FP16-NEXT: csel w8, w8, w9, mi
|
|
; CHECK-GI-FP16-NEXT: fmov s0, w8
|
|
; CHECK-GI-FP16-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt half %a, %b
|
|
%s = select i1 %c, half %d, half %e
|
|
ret half %s
|
|
}
|
|
|
|
define i32 @f16_i32(half %a, half %b, i32 %d, i32 %e) {
|
|
; CHECK-SD-NOFP16-LABEL: f16_i32:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: csel w0, w0, w1, mi
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: f16_i32:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmp h0, h1
|
|
; CHECK-SD-FP16-NEXT: csel w0, w0, w1, mi
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: f16_i32:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-GI-NOFP16-NEXT: fcmp s0, s1
|
|
; CHECK-GI-NOFP16-NEXT: csel w0, w0, w1, mi
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: f16_i32:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fcmp h0, h1
|
|
; CHECK-GI-FP16-NEXT: csel w0, w0, w1, mi
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt half %a, %b
|
|
%s = select i1 %c, i32 %d, i32 %e
|
|
ret i32 %s
|
|
}
|
|
|
|
define <2 x fp128> @v2f128_fp128(<2 x fp128> %a, <2 x fp128> %b, <2 x fp128> %d, <2 x fp128> %e) {
|
|
; CHECK-SD-LABEL: v2f128_fp128:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: sub sp, sp, #112
|
|
; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: stp q4, q5, [sp] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: mov v1.16b, v2.16b
|
|
; CHECK-SD-NEXT: stp q7, q6, [sp, #64] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: b.ge .LBB12_2
|
|
; CHECK-SD-NEXT: // %bb.1: // %entry
|
|
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: .LBB12_2: // %entry
|
|
; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: b.ge .LBB12_4
|
|
; CHECK-SD-NEXT: // %bb.3: // %entry
|
|
; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: .LBB12_4: // %entry
|
|
; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: add sp, sp, #112
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v2f128_fp128:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: sub sp, sp, #112
|
|
; CHECK-GI-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
|
|
; CHECK-GI-NEXT: .cfi_offset w19, -8
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: mov v1.16b, v2.16b
|
|
; CHECK-GI-NEXT: stp q6, q4, [sp, #32] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: stp q7, q5, [sp, #64] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov w19, w0
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q3, q2, [sp, #32] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: cmp w19, #0
|
|
; CHECK-GI-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov d0, v2.d[1]
|
|
; CHECK-GI-NEXT: mov d1, v3.d[1]
|
|
; CHECK-GI-NEXT: fcsel d2, d2, d3, lt
|
|
; CHECK-GI-NEXT: fmov x8, d2
|
|
; CHECK-GI-NEXT: fcsel d3, d0, d1, lt
|
|
; CHECK-GI-NEXT: ldp q5, q0, [sp, #64] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: mov d1, v0.d[1]
|
|
; CHECK-GI-NEXT: mov d4, v5.d[1]
|
|
; CHECK-GI-NEXT: fcsel d0, d0, d5, lt
|
|
; CHECK-GI-NEXT: fmov x9, d0
|
|
; CHECK-GI-NEXT: mov v0.d[0], x8
|
|
; CHECK-GI-NEXT: fmov x8, d3
|
|
; CHECK-GI-NEXT: fcsel d2, d1, d4, lt
|
|
; CHECK-GI-NEXT: mov v1.d[0], x9
|
|
; CHECK-GI-NEXT: fmov x9, d2
|
|
; CHECK-GI-NEXT: mov v0.d[1], x8
|
|
; CHECK-GI-NEXT: mov v1.d[1], x9
|
|
; CHECK-GI-NEXT: add sp, sp, #112
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <2 x fp128> %a, %b
|
|
%s = select <2 x i1> %c, <2 x fp128> %d, <2 x fp128> %e
|
|
ret <2 x fp128> %s
|
|
}
|
|
|
|
define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, <3 x fp128> %e) {
|
|
; CHECK-SD-LABEL: v3f128_fp128:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: sub sp, sp, #112
|
|
; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: stp q1, q4, [sp] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: mov v1.16b, v3.16b
|
|
; CHECK-SD-NEXT: stp q2, q5, [sp, #32] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: b.lt .LBB13_2
|
|
; CHECK-SD-NEXT: // %bb.1:
|
|
; CHECK-SD-NEXT: ldr q0, [sp, #128]
|
|
; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: .LBB13_2: // %entry
|
|
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: b.lt .LBB13_4
|
|
; CHECK-SD-NEXT: // %bb.3:
|
|
; CHECK-SD-NEXT: ldr q0, [sp, #144]
|
|
; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: .LBB13_4: // %entry
|
|
; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: add x8, sp, #160
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: add x9, sp, #112
|
|
; CHECK-SD-NEXT: csel x8, x9, x8, lt
|
|
; CHECK-SD-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: ldr q2, [x8]
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: add sp, sp, #112
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v3f128_fp128:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: sub sp, sp, #192
|
|
; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 192
|
|
; CHECK-GI-NEXT: .cfi_offset w19, -8
|
|
; CHECK-GI-NEXT: .cfi_offset w20, -16
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -32
|
|
; CHECK-GI-NEXT: stp q4, q1, [sp] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: mov v1.16b, v3.16b
|
|
; CHECK-GI-NEXT: stp q5, q2, [sp, #32] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: ldr q2, [sp, #192]
|
|
; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: ldr q2, [sp, #208]
|
|
; CHECK-GI-NEXT: stp q2, q6, [sp, #64] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: ldr q2, [sp, #224]
|
|
; CHECK-GI-NEXT: stp q7, q2, [sp, #96] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: ldr q2, [sp, #240]
|
|
; CHECK-GI-NEXT: str q2, [sp, #128] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov w19, w0
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov w20, w0
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q5, q4, [sp, #64] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: cmp w19, #0
|
|
; CHECK-GI-NEXT: ldp q7, q6, [sp, #96] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov d0, v4.d[1]
|
|
; CHECK-GI-NEXT: mov d1, v5.d[1]
|
|
; CHECK-GI-NEXT: fcsel d4, d4, d5, lt
|
|
; CHECK-GI-NEXT: mov d2, v7.d[1]
|
|
; CHECK-GI-NEXT: mov d3, v6.d[1]
|
|
; CHECK-GI-NEXT: fmov x8, d4
|
|
; CHECK-GI-NEXT: fcsel d5, d0, d1, lt
|
|
; CHECK-GI-NEXT: cmp w20, #0
|
|
; CHECK-GI-NEXT: fcsel d1, d7, d6, lt
|
|
; CHECK-GI-NEXT: ldp q7, q0, [sp, #128] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: fcsel d3, d2, d3, lt
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov d2, v0.d[1]
|
|
; CHECK-GI-NEXT: mov d6, v7.d[1]
|
|
; CHECK-GI-NEXT: fcsel d7, d0, d7, lt
|
|
; CHECK-GI-NEXT: mov v0.d[0], x8
|
|
; CHECK-GI-NEXT: fmov x8, d1
|
|
; CHECK-GI-NEXT: fmov x9, d7
|
|
; CHECK-GI-NEXT: fcsel d4, d2, d6, lt
|
|
; CHECK-GI-NEXT: mov v1.d[0], x8
|
|
; CHECK-GI-NEXT: fmov x8, d5
|
|
; CHECK-GI-NEXT: mov v2.d[0], x9
|
|
; CHECK-GI-NEXT: fmov x9, d3
|
|
; CHECK-GI-NEXT: fmov x10, d4
|
|
; CHECK-GI-NEXT: mov v0.d[1], x8
|
|
; CHECK-GI-NEXT: mov v1.d[1], x9
|
|
; CHECK-GI-NEXT: mov v2.d[1], x10
|
|
; CHECK-GI-NEXT: add sp, sp, #192
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <3 x fp128> %a, %b
|
|
%s = select <3 x i1> %c, <3 x fp128> %d, <3 x fp128> %e
|
|
ret <3 x fp128> %s
|
|
}
|
|
|
|
|
|
define <2 x double> @v2f128_double(<2 x fp128> %a, <2 x fp128> %b, <2 x double> %d, <2 x double> %e) {
|
|
; CHECK-SD-LABEL: v2f128_double:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: sub sp, sp, #96
|
|
; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-SD-NEXT: mov v1.16b, v3.16b
|
|
; CHECK-SD-NEXT: stp q4, q5, [sp, #48] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: str q2, [sp, #32] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: cset w8, lt
|
|
; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
|
|
; CHECK-SD-NEXT: fmov d0, x8
|
|
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: cset w8, lt
|
|
; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
|
|
; CHECK-SD-NEXT: fmov d0, x8
|
|
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-SD-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: bsl v0.16b, v2.16b, v1.16b
|
|
; CHECK-SD-NEXT: add sp, sp, #96
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v2f128_double:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: sub sp, sp, #80
|
|
; CHECK-GI-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
|
|
; CHECK-GI-NEXT: .cfi_offset w19, -8
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -16
|
|
; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: mov v1.16b, v2.16b
|
|
; CHECK-GI-NEXT: stp q5, q4, [sp, #32] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: cset w19, lt
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: mov v0.d[0], x19
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: cset w8, lt
|
|
; CHECK-GI-NEXT: ldp q2, q1, [sp, #32] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov v0.d[1], x8
|
|
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
|
|
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
|
|
; CHECK-GI-NEXT: bsl v0.16b, v1.16b, v2.16b
|
|
; CHECK-GI-NEXT: add sp, sp, #80
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <2 x fp128> %a, %b
|
|
%s = select <2 x i1> %c, <2 x double> %d, <2 x double> %e
|
|
ret <2 x double> %s
|
|
}
|
|
|
|
define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double> %d, <3 x double> %e) {
|
|
; CHECK-SD-LABEL: v3f128_double:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: sub sp, sp, #160
|
|
; CHECK-SD-NEXT: str x30, [sp, #144] // 8-byte Folded Spill
|
|
; CHECK-SD-NEXT: .cfi_def_cfa_offset 160
|
|
; CHECK-SD-NEXT: .cfi_offset w30, -16
|
|
; CHECK-SD-NEXT: stp q2, q5, [sp, #112] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-SD-NEXT: ldr d5, [sp, #184]
|
|
; CHECK-SD-NEXT: str q3, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: ldp d3, d2, [sp, #168]
|
|
; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-SD-NEXT: mov v1.16b, v4.16b
|
|
; CHECK-SD-NEXT: str q5, [sp, #96] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: ldr d5, [sp, #160]
|
|
; CHECK-SD-NEXT: mov v3.d[1], v2.d[0]
|
|
; CHECK-SD-NEXT: str q5, [sp, #80] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: stp q6, q3, [sp, #32] // 32-byte Folded Spill
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: cset w8, lt
|
|
; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
|
|
; CHECK-SD-NEXT: fmov d0, x8
|
|
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: cset w8, lt
|
|
; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
|
|
; CHECK-SD-NEXT: fmov d1, x8
|
|
; CHECK-SD-NEXT: mov v1.d[1], v0.d[0]
|
|
; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-SD-NEXT: ldp q0, q1, [sp, #112] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: bl __lttf2
|
|
; CHECK-SD-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: cmp w0, #0
|
|
; CHECK-SD-NEXT: ldp q2, q4, [sp, #64] // 32-byte Folded Reload
|
|
; CHECK-SD-NEXT: cset w8, lt
|
|
; CHECK-SD-NEXT: sbfx x8, x8, #0, #1
|
|
; CHECK-SD-NEXT: ldr q3, [sp, #96] // 16-byte Folded Reload
|
|
; CHECK-SD-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
|
|
; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
|
|
; CHECK-SD-NEXT: fmov d2, x8
|
|
; CHECK-SD-NEXT: bsl v2.16b, v4.16b, v3.16b
|
|
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
|
|
; CHECK-SD-NEXT: add sp, sp, #160
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v3f128_double:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: sub sp, sp, #176
|
|
; CHECK-GI-NEXT: str x30, [sp, #128] // 8-byte Folded Spill
|
|
; CHECK-GI-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: .cfi_def_cfa_offset 176
|
|
; CHECK-GI-NEXT: .cfi_offset w19, -8
|
|
; CHECK-GI-NEXT: .cfi_offset w20, -16
|
|
; CHECK-GI-NEXT: .cfi_offset w21, -24
|
|
; CHECK-GI-NEXT: .cfi_offset w22, -32
|
|
; CHECK-GI-NEXT: .cfi_offset w30, -48
|
|
; CHECK-GI-NEXT: stp q4, q1, [sp] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: mov v1.16b, v3.16b
|
|
; CHECK-GI-NEXT: ldr x19, [sp, #176]
|
|
; CHECK-GI-NEXT: stp q5, q2, [sp, #32] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: ldr d2, [sp, #184]
|
|
; CHECK-GI-NEXT: ldr x20, [sp, #200]
|
|
; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-GI-NEXT: str q7, [sp, #64] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: str q2, [sp, #112] // 16-byte Folded Spill
|
|
; CHECK-GI-NEXT: ldr d2, [sp, #192]
|
|
; CHECK-GI-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: cset w21, lt
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: cset w22, lt
|
|
; CHECK-GI-NEXT: bl __lttf2
|
|
; CHECK-GI-NEXT: sbfx x8, x21, #0, #1
|
|
; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: cmp w0, #0
|
|
; CHECK-GI-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov v0.d[0], x8
|
|
; CHECK-GI-NEXT: mov v1.d[0], x8
|
|
; CHECK-GI-NEXT: sbfx x8, x22, #0, #1
|
|
; CHECK-GI-NEXT: mov v2.d[1], v3.d[0]
|
|
; CHECK-GI-NEXT: ldp q4, q3, [sp, #96] // 32-byte Folded Reload
|
|
; CHECK-GI-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: mov v0.d[1], x8
|
|
; CHECK-GI-NEXT: mov v1.d[1], x8
|
|
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-GI-NEXT: cset w8, lt
|
|
; CHECK-GI-NEXT: sbfx x8, x8, #0, #1
|
|
; CHECK-GI-NEXT: and v1.16b, v2.16b, v1.16b
|
|
; CHECK-GI-NEXT: bic v0.16b, v3.16b, v0.16b
|
|
; CHECK-GI-NEXT: and x9, x19, x8
|
|
; CHECK-GI-NEXT: bic x8, x20, x8
|
|
; CHECK-GI-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
|
|
; CHECK-GI-NEXT: orr x8, x9, x8
|
|
; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
|
|
; CHECK-GI-NEXT: fmov d2, x8
|
|
; CHECK-GI-NEXT: mov d1, v0.d[1]
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-GI-NEXT: add sp, sp, #176
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <3 x fp128> %a, %b
|
|
%s = select <3 x i1> %c, <3 x double> %d, <3 x double> %e
|
|
ret <3 x double> %s
|
|
}
|
|
|
|
define <2 x double> @v2f64_double(<2 x double> %a, <2 x double> %b, <2 x double> %d, <2 x double> %e) {
|
|
; CHECK-LABEL: v2f64_double:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
|
|
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <2 x double> %a, %b
|
|
%s = select <2 x i1> %c, <2 x double> %d, <2 x double> %e
|
|
ret <2 x double> %s
|
|
}
|
|
|
|
define <3 x double> @v3f64_double(<3 x double> %a, <3 x double> %b, <3 x double> %d, <3 x double> %e) {
|
|
; CHECK-SD-LABEL: v3f64_double:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
|
|
; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
|
|
; CHECK-SD-NEXT: ldr d16, [sp, #24]
|
|
; CHECK-SD-NEXT: ldr d17, [sp]
|
|
; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-SD-NEXT: ldp d1, d4, [sp, #8]
|
|
; CHECK-SD-NEXT: fcmgt v2.2d, v5.2d, v2.2d
|
|
; CHECK-SD-NEXT: mov v1.d[1], v4.d[0]
|
|
; CHECK-SD-NEXT: fcmgt v0.2d, v3.2d, v0.2d
|
|
; CHECK-SD-NEXT: bsl v2.16b, v17.16b, v16.16b
|
|
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
|
|
; CHECK-SD-NEXT: bsl v0.16b, v6.16b, v1.16b
|
|
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v3f64_double:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-GI-NEXT: fcmp d2, d5
|
|
; CHECK-GI-NEXT: ldr x8, [sp]
|
|
; CHECK-GI-NEXT: ldr x10, [sp, #24]
|
|
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-GI-NEXT: ldp d1, d4, [sp, #8]
|
|
; CHECK-GI-NEXT: cset w9, mi
|
|
; CHECK-GI-NEXT: sbfx x9, x9, #0, #1
|
|
; CHECK-GI-NEXT: fcmgt v0.2d, v3.2d, v0.2d
|
|
; CHECK-GI-NEXT: mov v1.d[1], v4.d[0]
|
|
; CHECK-GI-NEXT: and x8, x8, x9
|
|
; CHECK-GI-NEXT: bic x9, x10, x9
|
|
; CHECK-GI-NEXT: orr x8, x8, x9
|
|
; CHECK-GI-NEXT: fmov d2, x8
|
|
; CHECK-GI-NEXT: bsl v0.16b, v6.16b, v1.16b
|
|
; CHECK-GI-NEXT: mov d1, v0.d[1]
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <3 x double> %a, %b
|
|
%s = select <3 x i1> %c, <3 x double> %d, <3 x double> %e
|
|
ret <3 x double> %s
|
|
}
|
|
|
|
define <4 x double> @v4f64_double(<4 x double> %a, <4 x double> %b, <4 x double> %d, <4 x double> %e) {
|
|
; CHECK-SD-LABEL: v4f64_double:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fcmgt v1.2d, v3.2d, v1.2d
|
|
; CHECK-SD-NEXT: fcmgt v0.2d, v2.2d, v0.2d
|
|
; CHECK-SD-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-SD-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v4f64_double:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fcmgt v0.2d, v2.2d, v0.2d
|
|
; CHECK-GI-NEXT: fcmgt v1.2d, v3.2d, v1.2d
|
|
; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <4 x double> %a, %b
|
|
%s = select <4 x i1> %c, <4 x double> %d, <4 x double> %e
|
|
ret <4 x double> %s
|
|
}
|
|
|
|
define <2 x i32> @v2f64_i32(<2 x double> %a, <2 x double> %b, <2 x i32> %d, <2 x i32> %e) {
|
|
; CHECK-LABEL: v2f64_i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <2 x double> %a, %b
|
|
%s = select <2 x i1> %c, <2 x i32> %d, <2 x i32> %e
|
|
ret <2 x i32> %s
|
|
}
|
|
|
|
define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x i32> %e) {
|
|
; CHECK-SD-LABEL: v3f64_i32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
|
|
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
|
|
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-SD-NEXT: fcmgt v1.2d, v5.2d, v2.2d
|
|
; CHECK-SD-NEXT: fcmgt v0.2d, v3.2d, v0.2d
|
|
; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NEXT: bsl v0.16b, v6.16b, v7.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v3f64_i32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: fcmp d2, d5
|
|
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-GI-NEXT: mov v1.s[0], w8
|
|
; CHECK-GI-NEXT: cset w9, mi
|
|
; CHECK-GI-NEXT: mov v2.s[0], w9
|
|
; CHECK-GI-NEXT: mov w9, #-1 // =0xffffffff
|
|
; CHECK-GI-NEXT: fcmgt v0.2d, v3.2d, v0.2d
|
|
; CHECK-GI-NEXT: mov v1.s[1], w8
|
|
; CHECK-GI-NEXT: mov v3.s[0], w9
|
|
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-GI-NEXT: mov v1.s[2], w8
|
|
; CHECK-GI-NEXT: mov v3.s[1], w9
|
|
; CHECK-GI-NEXT: mov v0.d[1], v2.d[0]
|
|
; CHECK-GI-NEXT: mov v3.s[2], w9
|
|
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NEXT: neg v1.4s, v1.4s
|
|
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NEXT: eor v1.16b, v0.16b, v3.16b
|
|
; CHECK-GI-NEXT: and v0.16b, v6.16b, v0.16b
|
|
; CHECK-GI-NEXT: and v1.16b, v7.16b, v1.16b
|
|
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <3 x double> %a, %b
|
|
%s = select <3 x i1> %c, <3 x i32> %d, <3 x i32> %e
|
|
ret <3 x i32> %s
|
|
}
|
|
|
|
define <4 x i32> @v4f64_i32(<4 x double> %a, <4 x double> %b, <4 x i32> %d, <4 x i32> %e) {
|
|
; CHECK-SD-LABEL: v4f64_i32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fcmgt v1.2d, v3.2d, v1.2d
|
|
; CHECK-SD-NEXT: fcmgt v0.2d, v2.2d, v0.2d
|
|
; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NEXT: bsl v0.16b, v4.16b, v5.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v4f64_i32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fcmgt v0.2d, v2.2d, v0.2d
|
|
; CHECK-GI-NEXT: fcmgt v1.2d, v3.2d, v1.2d
|
|
; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
|
|
; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
|
|
; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v5.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <4 x double> %a, %b
|
|
%s = select <4 x i1> %c, <4 x i32> %d, <4 x i32> %e
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
define <2 x float> @v2f32_float(<2 x float> %a, <2 x float> %b, <2 x float> %d, <2 x float> %e) {
|
|
; CHECK-LABEL: v2f32_float:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
|
|
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <2 x float> %a, %b
|
|
%s = select <2 x i1> %c, <2 x float> %d, <2 x float> %e
|
|
ret <2 x float> %s
|
|
}
|
|
|
|
define <3 x float> @v3f32_float(<3 x float> %a, <3 x float> %b, <3 x float> %d, <3 x float> %e) {
|
|
; CHECK-SD-LABEL: v3f32_float:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-SD-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v3f32_float:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: mov w9, #-1 // =0xffffffff
|
|
; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NEXT: mov v4.s[0], w8
|
|
; CHECK-GI-NEXT: mov v5.s[0], w9
|
|
; CHECK-GI-NEXT: mov v4.s[1], w8
|
|
; CHECK-GI-NEXT: mov v5.s[1], w9
|
|
; CHECK-GI-NEXT: mov v4.s[2], w8
|
|
; CHECK-GI-NEXT: mov v5.s[2], w9
|
|
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v4.4s
|
|
; CHECK-GI-NEXT: neg v1.4s, v4.4s
|
|
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NEXT: eor v1.16b, v0.16b, v5.16b
|
|
; CHECK-GI-NEXT: and v0.16b, v2.16b, v0.16b
|
|
; CHECK-GI-NEXT: and v1.16b, v3.16b, v1.16b
|
|
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <3 x float> %a, %b
|
|
%s = select <3 x i1> %c, <3 x float> %d, <3 x float> %e
|
|
ret <3 x float> %s
|
|
}
|
|
|
|
define <4 x float> @v4f32_float(<4 x float> %a, <4 x float> %b, <4 x float> %d, <4 x float> %e) {
|
|
; CHECK-LABEL: v4f32_float:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <4 x float> %a, %b
|
|
%s = select <4 x i1> %c, <4 x float> %d, <4 x float> %e
|
|
ret <4 x float> %s
|
|
}
|
|
|
|
define <8 x float> @v8f32_float(<8 x float> %a, <8 x float> %b, <8 x float> %d, <8 x float> %e) {
|
|
; CHECK-SD-LABEL: v8f32_float:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fcmgt v1.4s, v3.4s, v1.4s
|
|
; CHECK-SD-NEXT: fcmgt v0.4s, v2.4s, v0.4s
|
|
; CHECK-SD-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-SD-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v8f32_float:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fcmgt v0.4s, v2.4s, v0.4s
|
|
; CHECK-GI-NEXT: fcmgt v1.4s, v3.4s, v1.4s
|
|
; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <8 x float> %a, %b
|
|
%s = select <8 x i1> %c, <8 x float> %d, <8 x float> %e
|
|
ret <8 x float> %s
|
|
}
|
|
|
|
define <2 x i32> @v2f32_i32(<2 x float> %a, <2 x float> %b, <2 x i32> %d, <2 x i32> %e) {
|
|
; CHECK-LABEL: v2f32_i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s
|
|
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <2 x float> %a, %b
|
|
%s = select <2 x i1> %c, <2 x i32> %d, <2 x i32> %e
|
|
ret <2 x i32> %s
|
|
}
|
|
|
|
define <3 x i32> @v3f32_i32(<3 x float> %a, <3 x float> %b, <3 x i32> %d, <3 x i32> %e) {
|
|
; CHECK-SD-LABEL: v3f32_i32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-SD-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v3f32_i32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NEXT: mov w9, #-1 // =0xffffffff
|
|
; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NEXT: mov v4.s[0], w8
|
|
; CHECK-GI-NEXT: mov v5.s[0], w9
|
|
; CHECK-GI-NEXT: mov v4.s[1], w8
|
|
; CHECK-GI-NEXT: mov v5.s[1], w9
|
|
; CHECK-GI-NEXT: mov v4.s[2], w8
|
|
; CHECK-GI-NEXT: mov v5.s[2], w9
|
|
; CHECK-GI-NEXT: ushl v0.4s, v0.4s, v4.4s
|
|
; CHECK-GI-NEXT: neg v1.4s, v4.4s
|
|
; CHECK-GI-NEXT: sshl v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NEXT: eor v1.16b, v0.16b, v5.16b
|
|
; CHECK-GI-NEXT: and v0.16b, v2.16b, v0.16b
|
|
; CHECK-GI-NEXT: and v1.16b, v3.16b, v1.16b
|
|
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <3 x float> %a, %b
|
|
%s = select <3 x i1> %c, <3 x i32> %d, <3 x i32> %e
|
|
ret <3 x i32> %s
|
|
}
|
|
|
|
define <4 x i32> @v4f32_i32(<4 x float> %a, <4 x float> %b, <4 x i32> %d, <4 x i32> %e) {
|
|
; CHECK-LABEL: v4f32_i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <4 x float> %a, %b
|
|
%s = select <4 x i1> %c, <4 x i32> %d, <4 x i32> %e
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
define <8 x i32> @v8f32_i32(<8 x float> %a, <8 x float> %b, <8 x i32> %d, <8 x i32> %e) {
|
|
; CHECK-SD-LABEL: v8f32_i32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fcmgt v1.4s, v3.4s, v1.4s
|
|
; CHECK-SD-NEXT: fcmgt v0.4s, v2.4s, v0.4s
|
|
; CHECK-SD-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-SD-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: v8f32_i32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fcmgt v0.4s, v2.4s, v0.4s
|
|
; CHECK-GI-NEXT: fcmgt v1.4s, v3.4s, v1.4s
|
|
; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <8 x float> %a, %b
|
|
%s = select <8 x i1> %c, <8 x i32> %d, <8 x i32> %e
|
|
ret <8 x i32> %s
|
|
}
|
|
|
|
define <7 x half> @v7f16_half(<7 x half> %a, <7 x half> %b, <7 x half> %d, <7 x half> %e) {
|
|
; CHECK-SD-NOFP16-LABEL: v7f16_half:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s5, s4
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w9, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s4, s7
|
|
; CHECK-SD-NOFP16-NEXT: fmov s4, w9
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[1], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s6, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[2], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s16, s7
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[3], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s6, s5
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h7
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[4], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s6, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[5], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[6], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[7], w8
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: v7f16_half:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: v7f16_half:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: mov w8, #15 // =0xf
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[0], v0.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.h[0], v1.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: fmov s5, w8
|
|
; CHECK-GI-NOFP16-NEXT: mov w9, #65535 // =0xffff
|
|
; CHECK-GI-NOFP16-NEXT: fmov s7, w9
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], w8
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v0.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v1.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.h[1], w9
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], w8
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v0.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v1.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.h[2], w9
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[3], w8
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v6.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.h[3], w9
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[4], w8
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v1.4s, v6.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.h[4], w9
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[5], w8
|
|
; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.h[5], w9
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[6], w8
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.h[6], w9
|
|
; CHECK-GI-NOFP16-NEXT: ushl v0.8h, v0.8h, v5.8h
|
|
; CHECK-GI-NOFP16-NEXT: neg v1.8h, v5.8h
|
|
; CHECK-GI-NOFP16-NEXT: sshl v0.8h, v0.8h, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: eor v1.16b, v0.16b, v7.16b
|
|
; CHECK-GI-NOFP16-NEXT: and v0.16b, v2.16b, v0.16b
|
|
; CHECK-GI-NOFP16-NEXT: and v1.16b, v3.16b, v1.16b
|
|
; CHECK-GI-NOFP16-NEXT: orr v0.16b, v0.16b, v1.16b
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: v7f16_half:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: mov w8, #15 // =0xf
|
|
; CHECK-GI-FP16-NEXT: mov w9, #65535 // =0xffff
|
|
; CHECK-GI-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: fmov s4, w8
|
|
; CHECK-GI-FP16-NEXT: fmov s5, w9
|
|
; CHECK-GI-FP16-NEXT: mov v4.h[1], w8
|
|
; CHECK-GI-FP16-NEXT: mov v5.h[1], w9
|
|
; CHECK-GI-FP16-NEXT: mov v4.h[2], w8
|
|
; CHECK-GI-FP16-NEXT: mov v5.h[2], w9
|
|
; CHECK-GI-FP16-NEXT: mov v4.h[3], w8
|
|
; CHECK-GI-FP16-NEXT: mov v5.h[3], w9
|
|
; CHECK-GI-FP16-NEXT: mov v4.h[4], w8
|
|
; CHECK-GI-FP16-NEXT: mov v5.h[4], w9
|
|
; CHECK-GI-FP16-NEXT: mov v4.h[5], w8
|
|
; CHECK-GI-FP16-NEXT: mov v5.h[5], w9
|
|
; CHECK-GI-FP16-NEXT: mov v4.h[6], w8
|
|
; CHECK-GI-FP16-NEXT: mov v5.h[6], w9
|
|
; CHECK-GI-FP16-NEXT: ushl v0.8h, v0.8h, v4.8h
|
|
; CHECK-GI-FP16-NEXT: neg v1.8h, v4.8h
|
|
; CHECK-GI-FP16-NEXT: sshl v0.8h, v0.8h, v1.8h
|
|
; CHECK-GI-FP16-NEXT: eor v1.16b, v0.16b, v5.16b
|
|
; CHECK-GI-FP16-NEXT: and v0.16b, v2.16b, v0.16b
|
|
; CHECK-GI-FP16-NEXT: and v1.16b, v3.16b, v1.16b
|
|
; CHECK-GI-FP16-NEXT: orr v0.16b, v0.16b, v1.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <7 x half> %a, %b
|
|
%s = select <7 x i1> %c, <7 x half> %d, <7 x half> %e
|
|
ret <7 x half> %s
|
|
}
|
|
|
|
define <4 x half> @v4f16_half(<4 x half> %a, <4 x half> %b, <4 x half> %d, <4 x half> %e) {
|
|
; CHECK-SD-NOFP16-LABEL: v4f16_half:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: xtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: v4f16_half:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
|
|
; CHECK-SD-FP16-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: v4f16_half:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: v4f16_half:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
|
|
; CHECK-GI-FP16-NEXT: bsl v0.8b, v2.8b, v3.8b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <4 x half> %a, %b
|
|
%s = select <4 x i1> %c, <4 x half> %d, <4 x half> %e
|
|
ret <4 x half> %s
|
|
}
|
|
|
|
define <8 x half> @v8f16_half(<8 x half> %a, <8 x half> %b, <8 x half> %d, <8 x half> %e) {
|
|
; CHECK-SD-NOFP16-LABEL: v8f16_half:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s5, s4
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w9, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s4, s7
|
|
; CHECK-SD-NOFP16-NEXT: fmov s4, w9
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[1], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s6, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[2], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s16, s7
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[3], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s6, s5
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h7
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[4], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s6, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[5], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[6], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[7], w8
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: v8f16_half:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: v8f16_half:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v4.4s, v5.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v4.8h, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: shl v0.8h, v0.8h, #15
|
|
; CHECK-GI-NOFP16-NEXT: sshr v0.8h, v0.8h, #15
|
|
; CHECK-GI-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: v8f16_half:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <8 x half> %a, %b
|
|
%s = select <8 x i1> %c, <8 x half> %d, <8 x half> %e
|
|
ret <8 x half> %s
|
|
}
|
|
|
|
define <16 x half> @v16f16_half(<16 x half> %a, <16 x half> %b, <16 x half> %d, <16 x half> %e) {
|
|
; CHECK-SD-NOFP16-LABEL: v16f16_half:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v3.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v3.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s20, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s21, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v3.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s21, s20
|
|
; CHECK-SD-NOFP16-NEXT: mov h20, v3.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h21, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: csetm w14, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v3.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
|
|
; CHECK-SD-NOFP16-NEXT: csetm w13, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v3.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w11, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s21, s20
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: csetm w12, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: csetm w10, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w9, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s1, s3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: csetm w15, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v2.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: csetm w16, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s3, s1
|
|
; CHECK-SD-NOFP16-NEXT: fmov s1, w14
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: csetm w14, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fmov s3, w14
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.h[1], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v2.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[1], w16
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.h[2], w13
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[2], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.h[3], w11
|
|
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[3], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.h[4], w12
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[4], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.h[5], w10
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[5], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s0, s2
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.h[6], w9
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[6], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.h[7], w15
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[7], w8
|
|
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
|
|
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: v16f16_half:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmgt v1.8h, v3.8h, v1.8h
|
|
; CHECK-SD-FP16-NEXT: fcmgt v0.8h, v2.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-SD-FP16-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: v16f16_half:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v18.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v19.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v16.4s, v18.4s, v16.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v0.4s, v2.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v2.4s, v19.4s, v17.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v1.4s, v3.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: uzp1 v0.8h, v16.8h, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: uzp1 v1.8h, v2.8h, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: shl v0.8h, v0.8h, #15
|
|
; CHECK-GI-NOFP16-NEXT: shl v1.8h, v1.8h, #15
|
|
; CHECK-GI-NOFP16-NEXT: sshr v0.8h, v0.8h, #15
|
|
; CHECK-GI-NOFP16-NEXT: sshr v1.8h, v1.8h, #15
|
|
; CHECK-GI-NOFP16-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-GI-NOFP16-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: v16f16_half:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fcmgt v0.8h, v2.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: fcmgt v1.8h, v3.8h, v1.8h
|
|
; CHECK-GI-FP16-NEXT: bsl v0.16b, v4.16b, v6.16b
|
|
; CHECK-GI-FP16-NEXT: bsl v1.16b, v5.16b, v7.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <16 x half> %a, %b
|
|
%s = select <16 x i1> %c, <16 x half> %d, <16 x half> %e
|
|
ret <16 x half> %s
|
|
}
|
|
|
|
define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32> %e) {
|
|
; CHECK-SD-NOFP16-LABEL: v7f16_i32:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h0
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s3, s2
|
|
; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: csetm w9, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s5, s4
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
|
; CHECK-SD-NOFP16-NEXT: csetm w10, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s3, s2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h5
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w11, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: csetm w12, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s3, s2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h5
|
|
; CHECK-SD-NOFP16-NEXT: fmov s4, w9
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: add x9, sp, #8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w13, mi
|
|
; CHECK-SD-NOFP16-NEXT: fmov s5, w13
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[1], w8
|
|
; CHECK-SD-NOFP16-NEXT: mov x8, sp
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s3, s2
|
|
; CHECK-SD-NOFP16-NEXT: fmov s2, w7
|
|
; CHECK-SD-NOFP16-NEXT: fmov s3, w0
|
|
; CHECK-SD-NOFP16-NEXT: mov v5.h[1], w12
|
|
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[1], [x8]
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.s[1], w1
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[2], w10
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: fmov s1, w4
|
|
; CHECK-SD-NOFP16-NEXT: ldr s0, [sp, #24]
|
|
; CHECK-SD-NOFP16-NEXT: mov v5.h[2], w8
|
|
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[2], [x9]
|
|
; CHECK-SD-NOFP16-NEXT: add x9, sp, #32
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.s[2], w2
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.s[1], w5
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: mov v4.h[3], w11
|
|
; CHECK-SD-NOFP16-NEXT: ld1 { v0.s }[1], [x9]
|
|
; CHECK-SD-NOFP16-NEXT: mov v5.h[3], w8
|
|
; CHECK-SD-NOFP16-NEXT: add x8, sp, #16
|
|
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[3], [x8]
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.s[3], w3
|
|
; CHECK-SD-NOFP16-NEXT: add x8, sp, #40
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.s[2], w6
|
|
; CHECK-SD-NOFP16-NEXT: sshll v4.4s, v4.4h, #0
|
|
; CHECK-SD-NOFP16-NEXT: ld1 { v0.s }[2], [x8]
|
|
; CHECK-SD-NOFP16-NEXT: sshll v5.4s, v5.4h, #0
|
|
; CHECK-SD-NOFP16-NEXT: bit v2.16b, v3.16b, v4.16b
|
|
; CHECK-SD-NOFP16-NEXT: bit v0.16b, v1.16b, v5.16b
|
|
; CHECK-SD-NOFP16-NEXT: mov w1, v2.s[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov w2, v2.s[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov w3, v2.s[3]
|
|
; CHECK-SD-NOFP16-NEXT: fmov w0, s2
|
|
; CHECK-SD-NOFP16-NEXT: mov w5, v0.s[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov w6, v0.s[2]
|
|
; CHECK-SD-NOFP16-NEXT: fmov w4, s0
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: v7f16_i32:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmov s2, w0
|
|
; CHECK-SD-FP16-NEXT: fmov s3, w7
|
|
; CHECK-SD-FP16-NEXT: mov x8, sp
|
|
; CHECK-SD-FP16-NEXT: fmov s5, w4
|
|
; CHECK-SD-FP16-NEXT: ldr s4, [sp, #24]
|
|
; CHECK-SD-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: add x9, sp, #32
|
|
; CHECK-SD-FP16-NEXT: mov v2.s[1], w1
|
|
; CHECK-SD-FP16-NEXT: ld1 { v3.s }[1], [x8]
|
|
; CHECK-SD-FP16-NEXT: add x8, sp, #8
|
|
; CHECK-SD-FP16-NEXT: mov v5.s[1], w5
|
|
; CHECK-SD-FP16-NEXT: ld1 { v4.s }[1], [x9]
|
|
; CHECK-SD-FP16-NEXT: add x9, sp, #16
|
|
; CHECK-SD-FP16-NEXT: sshll v1.4s, v0.4h, #0
|
|
; CHECK-SD-FP16-NEXT: sshll2 v0.4s, v0.8h, #0
|
|
; CHECK-SD-FP16-NEXT: ld1 { v3.s }[2], [x8]
|
|
; CHECK-SD-FP16-NEXT: add x8, sp, #40
|
|
; CHECK-SD-FP16-NEXT: mov v2.s[2], w2
|
|
; CHECK-SD-FP16-NEXT: ld1 { v4.s }[2], [x8]
|
|
; CHECK-SD-FP16-NEXT: mov v5.s[2], w6
|
|
; CHECK-SD-FP16-NEXT: ld1 { v3.s }[3], [x9]
|
|
; CHECK-SD-FP16-NEXT: mov v2.s[3], w3
|
|
; CHECK-SD-FP16-NEXT: bsl v0.16b, v5.16b, v4.16b
|
|
; CHECK-SD-FP16-NEXT: bsl v1.16b, v2.16b, v3.16b
|
|
; CHECK-SD-FP16-NEXT: mov w5, v0.s[1]
|
|
; CHECK-SD-FP16-NEXT: mov w6, v0.s[2]
|
|
; CHECK-SD-FP16-NEXT: fmov w4, s0
|
|
; CHECK-SD-FP16-NEXT: mov w1, v1.s[1]
|
|
; CHECK-SD-FP16-NEXT: mov w2, v1.s[2]
|
|
; CHECK-SD-FP16-NEXT: mov w3, v1.s[3]
|
|
; CHECK-SD-FP16-NEXT: fmov w0, s1
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: v7f16_i32:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: mov v2.h[0], v0.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.h[0], v1.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: mov w8, #31 // =0x1f
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.s[0], w8
|
|
; CHECK-GI-NOFP16-NEXT: mov w9, #-1 // =0xffffffff
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.s[0], w0
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.s[0], w9
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.s[0], w7
|
|
; CHECK-GI-NOFP16-NEXT: ldr s16, [sp]
|
|
; CHECK-GI-NOFP16-NEXT: ldr s17, [sp, #24]
|
|
; CHECK-GI-NOFP16-NEXT: ldr s18, [sp, #32]
|
|
; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v1.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.s[1], w8
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.s[1], w1
|
|
; CHECK-GI-NOFP16-NEXT: mov v17.s[1], v18.s[0]
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.s[1], w9
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.s[1], v16.s[0]
|
|
; CHECK-GI-NOFP16-NEXT: ldr s16, [sp, #8]
|
|
; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.s[2], w8
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.s[2], w2
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.s[2], w9
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.s[2], v16.s[0]
|
|
; CHECK-GI-NOFP16-NEXT: ldr s16, [sp, #40]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v17.s[2], v16.s[0]
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.s[3], w3
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v2.4s, v3.4s, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.s[0], w4
|
|
; CHECK-GI-NOFP16-NEXT: ushl v2.4s, v2.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: neg v4.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.s[1], w5
|
|
; CHECK-GI-NOFP16-NEXT: sshl v2.4s, v2.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: ldr s4, [sp, #16]
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.s[2], w6
|
|
; CHECK-GI-NOFP16-NEXT: mov v7.s[3], v4.s[0]
|
|
; CHECK-GI-NOFP16-NEXT: eor v1.16b, v2.16b, v6.16b
|
|
; CHECK-GI-NOFP16-NEXT: and v2.16b, v3.16b, v2.16b
|
|
; CHECK-GI-NOFP16-NEXT: and v1.16b, v17.16b, v1.16b
|
|
; CHECK-GI-NOFP16-NEXT: bsl v0.16b, v5.16b, v7.16b
|
|
; CHECK-GI-NOFP16-NEXT: orr v1.16b, v2.16b, v1.16b
|
|
; CHECK-GI-NOFP16-NEXT: mov s2, v0.s[1]
|
|
; CHECK-GI-NOFP16-NEXT: mov s3, v0.s[2]
|
|
; CHECK-GI-NOFP16-NEXT: mov s4, v0.s[3]
|
|
; CHECK-GI-NOFP16-NEXT: fmov w0, s0
|
|
; CHECK-GI-NOFP16-NEXT: mov s5, v1.s[1]
|
|
; CHECK-GI-NOFP16-NEXT: mov s6, v1.s[2]
|
|
; CHECK-GI-NOFP16-NEXT: fmov w4, s1
|
|
; CHECK-GI-NOFP16-NEXT: fmov w1, s2
|
|
; CHECK-GI-NOFP16-NEXT: fmov w2, s3
|
|
; CHECK-GI-NOFP16-NEXT: fmov w3, s4
|
|
; CHECK-GI-NOFP16-NEXT: fmov w5, s5
|
|
; CHECK-GI-NOFP16-NEXT: fmov w6, s6
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: v7f16_i32:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: mov w9, #31 // =0x1f
|
|
; CHECK-GI-FP16-NEXT: mov v4.s[0], w0
|
|
; CHECK-GI-FP16-NEXT: mov v2.s[0], w9
|
|
; CHECK-GI-FP16-NEXT: mov v5.s[0], w7
|
|
; CHECK-GI-FP16-NEXT: ldr s6, [sp]
|
|
; CHECK-GI-FP16-NEXT: mov v7.s[0], w4
|
|
; CHECK-GI-FP16-NEXT: ldr s16, [sp, #32]
|
|
; CHECK-GI-FP16-NEXT: ldr s17, [sp, #8]
|
|
; CHECK-GI-FP16-NEXT: umov w8, v0.h[4]
|
|
; CHECK-GI-FP16-NEXT: umov w10, v0.h[5]
|
|
; CHECK-GI-FP16-NEXT: mov v4.s[1], w1
|
|
; CHECK-GI-FP16-NEXT: mov v2.s[1], w9
|
|
; CHECK-GI-FP16-NEXT: mov v5.s[1], v6.s[0]
|
|
; CHECK-GI-FP16-NEXT: ldr s6, [sp, #24]
|
|
; CHECK-GI-FP16-NEXT: mov v7.s[1], w5
|
|
; CHECK-GI-FP16-NEXT: mov v6.s[1], v16.s[0]
|
|
; CHECK-GI-FP16-NEXT: ldr s16, [sp, #40]
|
|
; CHECK-GI-FP16-NEXT: mov v1.s[0], w8
|
|
; CHECK-GI-FP16-NEXT: umov w8, v0.h[6]
|
|
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
|
|
; CHECK-GI-FP16-NEXT: mov v2.s[2], w9
|
|
; CHECK-GI-FP16-NEXT: mov v4.s[2], w2
|
|
; CHECK-GI-FP16-NEXT: mov v5.s[2], v17.s[0]
|
|
; CHECK-GI-FP16-NEXT: mov v7.s[2], w6
|
|
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
|
|
; CHECK-GI-FP16-NEXT: mov v6.s[2], v16.s[0]
|
|
; CHECK-GI-FP16-NEXT: mov v1.s[1], w10
|
|
; CHECK-GI-FP16-NEXT: mov w10, #-1 // =0xffffffff
|
|
; CHECK-GI-FP16-NEXT: mov v3.s[0], w10
|
|
; CHECK-GI-FP16-NEXT: mov v4.s[3], w3
|
|
; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
|
|
; CHECK-GI-FP16-NEXT: mov v1.s[2], w8
|
|
; CHECK-GI-FP16-NEXT: mov v3.s[1], w10
|
|
; CHECK-GI-FP16-NEXT: ushl v1.4s, v1.4s, v2.4s
|
|
; CHECK-GI-FP16-NEXT: neg v2.4s, v2.4s
|
|
; CHECK-GI-FP16-NEXT: mov v3.s[2], w10
|
|
; CHECK-GI-FP16-NEXT: sshl v1.4s, v1.4s, v2.4s
|
|
; CHECK-GI-FP16-NEXT: ldr s2, [sp, #16]
|
|
; CHECK-GI-FP16-NEXT: mov v5.s[3], v2.s[0]
|
|
; CHECK-GI-FP16-NEXT: eor v3.16b, v1.16b, v3.16b
|
|
; CHECK-GI-FP16-NEXT: and v1.16b, v7.16b, v1.16b
|
|
; CHECK-GI-FP16-NEXT: and v2.16b, v6.16b, v3.16b
|
|
; CHECK-GI-FP16-NEXT: bsl v0.16b, v4.16b, v5.16b
|
|
; CHECK-GI-FP16-NEXT: orr v1.16b, v1.16b, v2.16b
|
|
; CHECK-GI-FP16-NEXT: mov s2, v0.s[1]
|
|
; CHECK-GI-FP16-NEXT: mov s3, v0.s[2]
|
|
; CHECK-GI-FP16-NEXT: mov s4, v0.s[3]
|
|
; CHECK-GI-FP16-NEXT: fmov w0, s0
|
|
; CHECK-GI-FP16-NEXT: mov s5, v1.s[1]
|
|
; CHECK-GI-FP16-NEXT: mov s6, v1.s[2]
|
|
; CHECK-GI-FP16-NEXT: fmov w4, s1
|
|
; CHECK-GI-FP16-NEXT: fmov w1, s2
|
|
; CHECK-GI-FP16-NEXT: fmov w2, s3
|
|
; CHECK-GI-FP16-NEXT: fmov w3, s4
|
|
; CHECK-GI-FP16-NEXT: fmov w5, s5
|
|
; CHECK-GI-FP16-NEXT: fmov w6, s6
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <7 x half> %a, %b
|
|
%s = select <7 x i1> %c, <7 x i32> %d, <7 x i32> %e
|
|
ret <7 x i32> %s
|
|
}
|
|
|
|
define <4 x i32> @v4f16_i32(<4 x half> %a, <4 x half> %b, <4 x i32> %d, <4 x i32> %e) {
|
|
; CHECK-SD-NOFP16-LABEL: v4f16_i32:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: v4f16_i32:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
|
|
; CHECK-SD-FP16-NEXT: sshll v0.4s, v0.4h, #0
|
|
; CHECK-SD-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: v4f16_i32:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: v4f16_i32:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
|
|
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
|
|
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
|
|
; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
|
|
; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <4 x half> %a, %b
|
|
%s = select <4 x i1> %c, <4 x i32> %d, <4 x i32> %e
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
define <8 x i32> @v8f16_i32(<8 x half> %a, <8 x half> %b, <8 x i32> %d, <8 x i32> %e) {
|
|
; CHECK-SD-NOFP16-LABEL: v8f16_i32:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
|
; CHECK-SD-NOFP16-NEXT: csetm w9, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: csetm w10, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h0
|
|
; CHECK-SD-NOFP16-NEXT: csetm w11, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w12, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h16
|
|
; CHECK-SD-NOFP16-NEXT: fmov s16, w9
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: csetm w13, mi
|
|
; CHECK-SD-NOFP16-NEXT: fmov s17, w13
|
|
; CHECK-SD-NOFP16-NEXT: mov v16.h[1], w8
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: mov v17.h[1], w12
|
|
; CHECK-SD-NOFP16-NEXT: mov v16.h[2], w10
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: mov v17.h[2], w8
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: mov v16.h[3], w11
|
|
; CHECK-SD-NOFP16-NEXT: mov v17.h[3], w8
|
|
; CHECK-SD-NOFP16-NEXT: sshll v1.4s, v16.4h, #0
|
|
; CHECK-SD-NOFP16-NEXT: sshll v0.4s, v17.4h, #0
|
|
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v3.16b, v5.16b
|
|
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v4.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: v8f16_i32:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: sshll v6.4s, v0.4h, #0
|
|
; CHECK-SD-FP16-NEXT: sshll2 v0.4s, v0.8h, #0
|
|
; CHECK-SD-FP16-NEXT: mov v1.16b, v0.16b
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v6.16b
|
|
; CHECK-SD-FP16-NEXT: bsl v1.16b, v3.16b, v5.16b
|
|
; CHECK-SD-FP16-NEXT: bsl v0.16b, v2.16b, v4.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: v8f16_i32:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v6.4s, v7.4s, v6.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v1.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.16b, v6.16b
|
|
; CHECK-GI-NOFP16-NEXT: bsl v1.16b, v3.16b, v5.16b
|
|
; CHECK-GI-NOFP16-NEXT: bsl v0.16b, v2.16b, v4.16b
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: v8f16_i32:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: ushll v1.4s, v0.4h, #0
|
|
; CHECK-GI-FP16-NEXT: ushll2 v0.4s, v0.8h, #0
|
|
; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31
|
|
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
|
|
; CHECK-GI-FP16-NEXT: sshr v1.4s, v1.4s, #31
|
|
; CHECK-GI-FP16-NEXT: sshr v6.4s, v0.4s, #31
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-GI-FP16-NEXT: mov v1.16b, v6.16b
|
|
; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v4.16b
|
|
; CHECK-GI-FP16-NEXT: bsl v1.16b, v3.16b, v5.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <8 x half> %a, %b
|
|
%s = select <8 x i1> %c, <8 x i32> %d, <8 x i32> %e
|
|
ret <8 x i32> %s
|
|
}
|
|
|
|
define <16 x i32> @v16f16_i32(<16 x half> %a, <16 x half> %b, <16 x i32> %d, <16 x i32> %e) {
|
|
; CHECK-SD-NOFP16-LABEL: v16f16_i32:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v3.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v3.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h20, v3.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h21, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v3.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v3.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: csetm w10, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s21, s20
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s20, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s21, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: csetm w9, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v3.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w11, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: csetm w12, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s21, s20
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: csetm w14, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v2.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: csetm w13, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s1, s3
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v2.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: csetm w15, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v2.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: csetm w16, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v2.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: csetm w17, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s3, s1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h0
|
|
; CHECK-SD-NOFP16-NEXT: csetm w18, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: fmov s18, w14
|
|
; CHECK-SD-NOFP16-NEXT: fmov s19, w17
|
|
; CHECK-SD-NOFP16-NEXT: csetm w0, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s3, s1
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v2.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov v18.h[1], w12
|
|
; CHECK-SD-NOFP16-NEXT: mov v19.h[1], w16
|
|
; CHECK-SD-NOFP16-NEXT: csetm w1, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: fmov s16, w10
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: csetm w2, mi
|
|
; CHECK-SD-NOFP16-NEXT: mov v16.h[1], w8
|
|
; CHECK-SD-NOFP16-NEXT: mov v18.h[2], w13
|
|
; CHECK-SD-NOFP16-NEXT: fmov s17, w2
|
|
; CHECK-SD-NOFP16-NEXT: mov v19.h[2], w18
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s3, s1
|
|
; CHECK-SD-NOFP16-NEXT: mov v17.h[1], w1
|
|
; CHECK-SD-NOFP16-NEXT: mov v16.h[2], w9
|
|
; CHECK-SD-NOFP16-NEXT: mov v18.h[3], w15
|
|
; CHECK-SD-NOFP16-NEXT: mov v19.h[3], w0
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: fcmp s0, s2
|
|
; CHECK-SD-NOFP16-NEXT: mov v17.h[2], w8
|
|
; CHECK-SD-NOFP16-NEXT: mov v16.h[3], w11
|
|
; CHECK-SD-NOFP16-NEXT: csetm w8, mi
|
|
; CHECK-SD-NOFP16-NEXT: mov v17.h[3], w8
|
|
; CHECK-SD-NOFP16-NEXT: sshll v2.4s, v16.4h, #0
|
|
; CHECK-SD-NOFP16-NEXT: sshll v16.4s, v18.4h, #0
|
|
; CHECK-SD-NOFP16-NEXT: ldp q0, q18, [sp]
|
|
; CHECK-SD-NOFP16-NEXT: sshll v1.4s, v17.4h, #0
|
|
; CHECK-SD-NOFP16-NEXT: sshll v17.4s, v19.4h, #0
|
|
; CHECK-SD-NOFP16-NEXT: ldp q19, q3, [sp, #32]
|
|
; CHECK-SD-NOFP16-NEXT: bit v0.16b, v4.16b, v1.16b
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.16b, v17.16b
|
|
; CHECK-SD-NOFP16-NEXT: bit v3.16b, v7.16b, v2.16b
|
|
; CHECK-SD-NOFP16-NEXT: mov v2.16b, v16.16b
|
|
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v18.16b
|
|
; CHECK-SD-NOFP16-NEXT: bsl v2.16b, v6.16b, v19.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: v16f16_i32:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fcmgt v0.8h, v2.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: fcmgt v1.8h, v3.8h, v1.8h
|
|
; CHECK-SD-FP16-NEXT: ldp q2, q20, [sp]
|
|
; CHECK-SD-FP16-NEXT: ldp q18, q19, [sp, #32]
|
|
; CHECK-SD-FP16-NEXT: sshll v3.4s, v0.4h, #0
|
|
; CHECK-SD-FP16-NEXT: sshll v16.4s, v1.4h, #0
|
|
; CHECK-SD-FP16-NEXT: sshll2 v17.4s, v1.8h, #0
|
|
; CHECK-SD-FP16-NEXT: sshll2 v1.4s, v0.8h, #0
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v3.16b
|
|
; CHECK-SD-FP16-NEXT: mov v3.16b, v17.16b
|
|
; CHECK-SD-FP16-NEXT: bsl v1.16b, v5.16b, v20.16b
|
|
; CHECK-SD-FP16-NEXT: bsl v0.16b, v4.16b, v2.16b
|
|
; CHECK-SD-FP16-NEXT: mov v2.16b, v16.16b
|
|
; CHECK-SD-FP16-NEXT: bsl v3.16b, v7.16b, v19.16b
|
|
; CHECK-SD-FP16-NEXT: bsl v2.16b, v6.16b, v18.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: v16f16_i32:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v18.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v19.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v2.4s, v2.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v17.4s, v19.4s, v17.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v16.4s, v18.4s, v16.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcmgt v3.4s, v3.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: ldp q0, q1, [sp]
|
|
; CHECK-GI-NOFP16-NEXT: ldp q18, q19, [sp, #32]
|
|
; CHECK-GI-NOFP16-NEXT: bit v1.16b, v5.16b, v2.16b
|
|
; CHECK-GI-NOFP16-NEXT: mov v2.16b, v17.16b
|
|
; CHECK-GI-NOFP16-NEXT: bit v0.16b, v4.16b, v16.16b
|
|
; CHECK-GI-NOFP16-NEXT: bsl v3.16b, v7.16b, v19.16b
|
|
; CHECK-GI-NOFP16-NEXT: bsl v2.16b, v6.16b, v18.16b
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: v16f16_i32:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fcmgt v0.8h, v2.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: fcmgt v1.8h, v3.8h, v1.8h
|
|
; CHECK-GI-FP16-NEXT: ldp q18, q19, [sp, #32]
|
|
; CHECK-GI-FP16-NEXT: ushll v2.4s, v0.4h, #0
|
|
; CHECK-GI-FP16-NEXT: ushll2 v0.4s, v0.8h, #0
|
|
; CHECK-GI-FP16-NEXT: ushll v3.4s, v1.4h, #0
|
|
; CHECK-GI-FP16-NEXT: ushll2 v1.4s, v1.8h, #0
|
|
; CHECK-GI-FP16-NEXT: shl v2.4s, v2.4s, #31
|
|
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
|
|
; CHECK-GI-FP16-NEXT: shl v3.4s, v3.4s, #31
|
|
; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31
|
|
; CHECK-GI-FP16-NEXT: sshr v2.4s, v2.4s, #31
|
|
; CHECK-GI-FP16-NEXT: sshr v16.4s, v0.4s, #31
|
|
; CHECK-GI-FP16-NEXT: sshr v3.4s, v3.4s, #31
|
|
; CHECK-GI-FP16-NEXT: sshr v17.4s, v1.4s, #31
|
|
; CHECK-GI-FP16-NEXT: ldp q0, q1, [sp]
|
|
; CHECK-GI-FP16-NEXT: bit v0.16b, v4.16b, v2.16b
|
|
; CHECK-GI-FP16-NEXT: mov v2.16b, v3.16b
|
|
; CHECK-GI-FP16-NEXT: mov v3.16b, v17.16b
|
|
; CHECK-GI-FP16-NEXT: bit v1.16b, v5.16b, v16.16b
|
|
; CHECK-GI-FP16-NEXT: bsl v2.16b, v6.16b, v18.16b
|
|
; CHECK-GI-FP16-NEXT: bsl v3.16b, v7.16b, v19.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%c = fcmp olt <16 x half> %a, %b
|
|
%s = select <16 x i1> %c, <16 x i32> %d, <16 x i32> %e
|
|
ret <16 x i32> %s
|
|
}
|