Some MVE instructions have qr variants that take a Q and R register, splatting the R register for each lane. This is usually handled fine for standard splats as we sink the splat into the loop and combine the resulting dup into the qr instruction. It does not work for constant splats though, as we generate a vmovimm or constant pool load instead. This intercepts that, generating a vdup of the constant instead where we can turn the result into a qr instruction variant. Differential Revision: https://reviews.llvm.org/D115242
254 lines
7.8 KiB
LLVM
254 lines
7.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat=false -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_v4i32(i32 %x, <4 x i32> %s0, <4 x i32> %s1) {
|
|
; CHECK-LABEL: test_v4i32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: it eq
|
|
; CHECK-NEXT: bxeq lr
|
|
; CHECK-NEXT: .LBB0_1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <4 x i32> %s0, <4 x i32> %s1
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_v8i16(i32 %x, <8 x i16> %s0, <8 x i16> %s1) {
|
|
; CHECK-LABEL: test_v8i16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: it eq
|
|
; CHECK-NEXT: bxeq lr
|
|
; CHECK-NEXT: .LBB1_1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <8 x i16> %s0, <8 x i16> %s1
|
|
ret <8 x i16> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_v16i8(i32 %x, <16 x i8> %s0, <16 x i8> %s1) {
|
|
; CHECK-LABEL: test_v16i8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: it eq
|
|
; CHECK-NEXT: bxeq lr
|
|
; CHECK-NEXT: .LBB2_1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <16 x i8> %s0, <16 x i8> %s1
|
|
ret <16 x i8> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @test_v2i64(i32 %x, <2 x i64> %s0, <2 x i64> %s1) {
|
|
; CHECK-LABEL: test_v2i64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: it eq
|
|
; CHECK-NEXT: bxeq lr
|
|
; CHECK-NEXT: .LBB3_1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <2 x i64> %s0, <2 x i64> %s1
|
|
ret <2 x i64> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_v4float(i32 %x, <4 x float> %s0, <4 x float> %s1) {
|
|
; CHECK-LABEL: test_v4float:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: it eq
|
|
; CHECK-NEXT: bxeq lr
|
|
; CHECK-NEXT: .LBB4_1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <4 x float> %s0, <4 x float> %s1
|
|
ret <4 x float> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_v8half(i32 %x, <8 x half> %s0, <8 x half> %s1) {
|
|
; CHECK-LABEL: test_v8half:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: it eq
|
|
; CHECK-NEXT: bxeq lr
|
|
; CHECK-NEXT: .LBB5_1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <8 x half> %s0, <8 x half> %s1
|
|
ret <8 x half> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x double> @test_v2double(i32 %x, <2 x double> %s0, <2 x double> %s1) {
|
|
; CHECK-LABEL: test_v2double:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cmp r0, #0
|
|
; CHECK-NEXT: it eq
|
|
; CHECK-NEXT: bxeq lr
|
|
; CHECK-NEXT: .LBB6_1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <2 x double> %s0, <2 x double> %s1
|
|
ret <2 x double> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @minsize_v4i32(i32 %x, <4 x i32> %s0, <4 x i32> %s1) minsize {
|
|
; CHECK-LABEL: minsize_v4i32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cbz r0, .LBB7_2
|
|
; CHECK-NEXT: @ %bb.1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: .LBB7_2: @ %select.end
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <4 x i32> %s0, <4 x i32> %s1
|
|
ret <4 x i32> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @minsize_v8i16(i32 %x, <8 x i16> %s0, <8 x i16> %s1) minsize {
|
|
; CHECK-LABEL: minsize_v8i16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cbz r0, .LBB8_2
|
|
; CHECK-NEXT: @ %bb.1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: .LBB8_2: @ %select.end
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <8 x i16> %s0, <8 x i16> %s1
|
|
ret <8 x i16> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @minsize_v16i8(i32 %x, <16 x i8> %s0, <16 x i8> %s1) minsize {
|
|
; CHECK-LABEL: minsize_v16i8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cbz r0, .LBB9_2
|
|
; CHECK-NEXT: @ %bb.1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: .LBB9_2: @ %select.end
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <16 x i8> %s0, <16 x i8> %s1
|
|
ret <16 x i8> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x i64> @minsize_v2i64(i32 %x, <2 x i64> %s0, <2 x i64> %s1) minsize {
|
|
; CHECK-LABEL: minsize_v2i64:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cbz r0, .LBB10_2
|
|
; CHECK-NEXT: @ %bb.1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: .LBB10_2: @ %select.end
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <2 x i64> %s0, <2 x i64> %s1
|
|
ret <2 x i64> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @minsize_v4float(i32 %x, <4 x float> %s0, <4 x float> %s1) minsize {
|
|
; CHECK-LABEL: minsize_v4float:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cbz r0, .LBB11_2
|
|
; CHECK-NEXT: @ %bb.1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: .LBB11_2: @ %select.end
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <4 x float> %s0, <4 x float> %s1
|
|
ret <4 x float> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @minsize_v8half(i32 %x, <8 x half> %s0, <8 x half> %s1) minsize {
|
|
; CHECK-LABEL: minsize_v8half:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cbz r0, .LBB12_2
|
|
; CHECK-NEXT: @ %bb.1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: .LBB12_2: @ %select.end
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <8 x half> %s0, <8 x half> %s1
|
|
ret <8 x half> %s
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <2 x double> @minsize_v2double(i32 %x, <2 x double> %s0, <2 x double> %s1) minsize {
|
|
; CHECK-LABEL: minsize_v2double:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: cbz r0, .LBB13_2
|
|
; CHECK-NEXT: @ %bb.1: @ %select.false
|
|
; CHECK-NEXT: vmov q0, q1
|
|
; CHECK-NEXT: .LBB13_2: @ %select.end
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%c = icmp eq i32 %x, 0
|
|
%s = select i1 %c, <2 x double> %s0, <2 x double> %s1
|
|
ret <2 x double> %s
|
|
}
|
|
|
|
define i32 @e() {
|
|
; CHECK-LABEL: e:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: adr r0, .LCPI14_0
|
|
; CHECK-NEXT: movs r1, #0
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: movs r0, #4
|
|
; CHECK-NEXT: vmov q1, q0
|
|
; CHECK-NEXT: .LBB14_1: @ %vector.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: adds r1, #4
|
|
; CHECK-NEXT: vadd.i32 q1, q1, r0
|
|
; CHECK-NEXT: cmp r1, #8
|
|
; CHECK-NEXT: csetm r2, eq
|
|
; CHECK-NEXT: subs.w r3, r1, #8
|
|
; CHECK-NEXT: vdup.32 q2, r2
|
|
; CHECK-NEXT: csel r1, r1, r3, ne
|
|
; CHECK-NEXT: vbic q1, q1, q2
|
|
; CHECK-NEXT: vand q2, q2, q0
|
|
; CHECK-NEXT: vorr q1, q2, q1
|
|
; CHECK-NEXT: b .LBB14_1
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: @ %bb.2:
|
|
; CHECK-NEXT: .LCPI14_0:
|
|
; CHECK-NEXT: .long 0 @ 0x0
|
|
; CHECK-NEXT: .long 1 @ 0x1
|
|
; CHECK-NEXT: .long 2 @ 0x2
|
|
; CHECK-NEXT: .long 3 @ 0x3
|
|
entry:
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %pred.store.continue73, %entry
|
|
%index = phi i32 [ 0, %entry ], [ %spec.select, %pred.store.continue73 ]
|
|
%vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %entry ], [ %spec.select74, %pred.store.continue73 ]
|
|
%l3 = icmp ult <4 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5>
|
|
%l4 = extractelement <4 x i1> %l3, i32 0
|
|
br label %pred.store.continue73
|
|
|
|
pred.store.continue73: ; preds = %pred.store.if72, %pred.store.continue71
|
|
%index.next = add i32 %index, 4
|
|
%vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
|
|
%l60 = icmp eq i32 %index.next, 8
|
|
%spec.select = select i1 %l60, i32 0, i32 %index.next
|
|
%spec.select74 = select i1 %l60, <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> %vec.ind.next
|
|
br label %vector.body
|
|
}
|
|
|