Files
clang-p2996/llvm/test/CodeGen/ARM/aes-erratum-fix.ll
Archibald Elliott 20b2d11896 [ARM] Fix Crash in 't'/'w' handling without fp16/bf16
After https://reviews.llvm.org/rGff4027d152d0 and
https://reviews.llvm.org/rG7d15212b8c0c we saw crashes in SelectionDAG
when trying to use these constraints when you don't have the fp16 or
bf16 extensions.

However, it is still possible to move 16-bit floating point values into
the right place in S registers with a normal `vmov`, even if we don't
have fp16 instructions we can use within the inline assembly string.
This patch therefore fixes the crash.

I think the reason we weren't getting this crash before is because I
think the __fp16 and __bf16 types got an error diagnostic in the Clang
frontend when you didn't have the right architectural extensions to use
them. This restriction was recently relaxed.

The approach for bf16 needs a bit more explanation. Exactly how BF16 is
legalized was changed in rGb769eb02b526e3966847351e15d283514c2ec767 -
effectively, whether you have the right instructions to get a bf16 value
into/out of a S register with MoveTo/FromHPR depends on hasFullFP16, but
whether you use a HPR for a value of type MVT::bf16 depends on hasBF16.
This is why the tests are not changed by `+bf16` vs `-bf16`, but I've
left both sets of RUN lines in case this changes in the future.

Test Changes:
- Added more testing for testing inline asm (the core part)
- fp16-promote.ll and pr47454.ll show improvements where unnecessary
  fp16-fp32 up/down-casts are no longer emitted. This results in fewer
  libcalls where those casts would be done with a libcall.
- aes-erratum-fix.ll is fairly noisy, and I need to revisit this test so
  that the IR is more minimal than it is right now, because most of the
  changes in this commit do not relate to what AES is actually trying to
  verify.

Differential Revision: https://reviews.llvm.org/D143711
2023-03-06 11:55:08 +00:00

4515 lines
170 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple armv8---eabi -mattr=+aes,+fix-cortex-a57-aes-1742098 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-FIX-NOSCHED
; These CPUs should have the fix enabled by default. They use different
; FileCheck prefixes because some instructions are scheduled differently.
;
; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a57 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a72 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
; This checks that adding `+fix-cortex-a57-aes-1742098` causes `vorr` to be
; inserted wherever the compiler cannot prove that either input to the first aes
; instruction in a fused aes pair was set by 64-bit Neon register writes or
; 128-bit Neon register writes. All other register writes are unsafe, and
; require a `vorr` to protect the AES input.
declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>)
declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>)
declare arm_aapcs_vfpcc <16 x i8> @get_input() local_unnamed_addr
declare arm_aapcs_vfpcc <16 x i8> @get_inputf16(half) local_unnamed_addr
declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr
define arm_aapcs_vfpcc void @aese_zero(<16 x i8>* %0) nounwind {
; CHECK-FIX-LABEL: aese_zero:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vmov.i32 q9, #0x0
; CHECK-FIX-NEXT: aese.8 q9, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
%2 = load <16 x i8>, <16 x i8>* %0, align 8
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2)
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
store <16 x i8> %4, <16 x i8>* %0, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_via_call1(<16 x i8>* %0) nounwind {
; CHECK-FIX-LABEL: aese_via_call1:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_input
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
%3 = load <16 x i8>, <16 x i8>* %0, align 8
%4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3)
%5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4)
store <16 x i8> %5, <16 x i8>* %0, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_via_call2(half %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-LABEL: aese_via_call2:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_inputf16
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
%4 = load <16 x i8>, <16 x i8>* %1, align 8
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
store <16 x i8> %6, <16 x i8>* %1, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_via_call3(float %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-LABEL: aese_via_call3:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_inputf32
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aese.8 q0, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
%4 = load <16 x i8>, <16 x i8>* %1, align 8
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
store <16 x i8> %6, <16 x i8>* %1, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-LABEL: aese_once_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-FIX-NEXT: aese.8 q9, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%3 = load <16 x i8>, <16 x i8>* %1, align 8
%4 = load <16 x i8>, <16 x i8>* %0, align 8
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
store <16 x i8> %6, <16 x i8>* %1, align 8
ret void
}
define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aese_once_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aese.8 q1, q0
; CHECK-FIX-NEXT: aesmc.8 q0, q1
; CHECK-FIX-NEXT: bx lr
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
ret <16 x i8> %4
}
define arm_aapcs_vfpcc void @aese_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-LABEL: aese_twice_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-FIX-NEXT: aese.8 q9, q8
; CHECK-FIX-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
; CHECK-FIX-NEXT: aese.8 q8, q9
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%3 = load <16 x i8>, <16 x i8>* %1, align 8
%4 = load <16 x i8>, <16 x i8>* %0, align 8
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
store <16 x i8> %6, <16 x i8>* %1, align 8
%7 = load <16 x i8>, <16 x i8>* %0, align 8
%8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
%9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
store <16 x i8> %9, <16 x i8>* %1, align 8
ret void
}
define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aese_twice_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aese.8 q1, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q0, q8
; CHECK-FIX-NEXT: bx lr
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %4, <16 x i8> %0)
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
ret <16 x i8> %6
}
define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
; CHECK-FIX-NOSCHED-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2]
; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: bne .LBB8_1
; CHECK-FIX-NOSCHED-NEXT: @ %bb.2:
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_loop_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: bxeq lr
; CHECK-CORTEX-FIX-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2]
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: bne .LBB8_1
; CHECK-CORTEX-FIX-NEXT: @ %bb.2:
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = icmp eq i32 %0, 0
br i1 %4, label %5, label %6
5:
ret void
6:
%7 = phi i32 [ %12, %6 ], [ 0, %3 ]
%8 = load <16 x i8>, <16 x i8>* %2, align 8
%9 = load <16 x i8>, <16 x i8>* %1, align 8
%10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9)
%11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
store <16 x i8> %11, <16 x i8>* %2, align 8
%12 = add nuw i32 %7, 1
%13 = icmp eq i32 %12, %0
br i1 %13, label %5, label %6
}
define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
; CHECK-FIX-LABEL: aese_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB9_2
; CHECK-FIX-NEXT: .LBB9_1: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aese.8 q1, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesmc.8 q1, q1
; CHECK-FIX-NEXT: bne .LBB9_1
; CHECK-FIX-NEXT: .LBB9_2:
; CHECK-FIX-NEXT: vorr q0, q1, q1
; CHECK-FIX-NEXT: bx lr
%4 = icmp eq i32 %0, 0
br i1 %4, label %5, label %7
5:
%6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
ret <16 x i8> %6
7:
%8 = phi i32 [ %12, %7 ], [ 0, %3 ]
%9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
%10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %1)
%11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
%12 = add nuw i32 %8, 1
%13 = icmp eq i32 %12, %0
br i1 %13, label %5, label %7
}
define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = load i8, i8* %0, align 1
%5 = load <16 x i8>, <16 x i8>* %2, align 8
%6 = insertelement <16 x i8> %5, i8 %4, i64 0
%7 = insertelement <16 x i8> %1, i8 %4, i64 0
%8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
%9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
store <16 x i8> %9, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_set8_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.8 d0[0], r0
; CHECK-FIX-NEXT: vmov.8 d16[0], r0
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%4 = load <16 x i8>, <16 x i8>* %2, align 8
%5 = insertelement <16 x i8> %4, i8 %0, i64 0
%6 = insertelement <16 x i8> %1, i8 %0, i64 0
%7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6)
%8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7)
store <16 x i8> %8, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB12_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB12_3
; CHECK-FIX-NEXT: b .LBB12_4
; CHECK-FIX-NEXT: .LBB12_2:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB12_4
; CHECK-FIX-NEXT: .LBB12_3:
; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1]
; CHECK-FIX-NEXT: .LBB12_4:
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
br i1 %0, label %5, label %9
5:
%6 = load i8, i8* %1, align 1
%7 = load <16 x i8>, <16 x i8>* %3, align 8
%8 = insertelement <16 x i8> %7, i8 %6, i64 0
br label %11
9:
%10 = load <16 x i8>, <16 x i8>* %3, align 8
br label %11
11:
%12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
br i1 %0, label %13, label %16
13:
%14 = load i8, i8* %1, align 1
%15 = insertelement <16 x i8> %2, i8 %14, i64 0
br label %16
16:
%17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB13_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vmov.8 d16[0], r1
; CHECK-FIX-NEXT: .LBB13_2: @ %select.end
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB13_4
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
; CHECK-FIX-NEXT: .LBB13_4: @ %select.end1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = load <16 x i8>, <16 x i8>* %3, align 8
%6 = insertelement <16 x i8> %5, i8 %1, i64 0
%7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
%8 = insertelement <16 x i8> %2, i8 %1, i64 0
%9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
%10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9)
%11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
store <16 x i8> %11, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrb r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strb r1, [r2]
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB14_1:
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB14_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB14_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = load i8, i8* %1, align 1
%6 = insertelement <16 x i8> %2, i8 %5, i64 0
%7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0
store i8 %5, i8* %7, align 8
%8 = icmp eq i32 %0, 0
br i1 %8, label %12, label %9
9:
%10 = load <16 x i8>, <16 x i8>* %3, align 8
br label %13
11:
store <16 x i8> %17, <16 x i8>* %3, align 8
br label %12
12:
ret void
13:
%14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
%15 = phi i32 [ 0, %9 ], [ %18, %13 ]
%16 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %6)
%17 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %16)
%18 = add nuw i32 %15, 1
%19 = icmp eq i32 %18, %0
br i1 %19, label %11, label %13
}
define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set8_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB15_1:
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB15_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vmov.8 d16[0], r1
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB15_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %10, label %6
6:
%7 = insertelement <16 x i8> %2, i8 %1, i64 0
%8 = load <16 x i8>, <16 x i8>* %3, align 8
br label %11
9:
store <16 x i8> %16, <16 x i8>* %3, align 8
br label %10
10:
ret void
11:
%12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
%13 = phi i32 [ 0, %6 ], [ %17, %11 ]
%14 = insertelement <16 x i8> %12, i8 %1, i64 0
%15 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %7)
%16 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %15)
%17 = add nuw i32 %13, 1
%18 = icmp eq i32 %17, %0
br i1 %18, label %9, label %11
}
define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = load i16, i16* %0, align 2
%5 = bitcast <16 x i8>* %2 to <8 x i16>*
%6 = load <8 x i16>, <8 x i16>* %5, align 8
%7 = insertelement <8 x i16> %6, i16 %4, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <8 x i16>
%10 = insertelement <8 x i16> %9, i16 %4, i64 0
%11 = bitcast <8 x i16> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_set16_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.16 d0[0], r0
; CHECK-FIX-NEXT: vmov.16 d16[0], r0
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <8 x i16>*
%5 = load <8 x i16>, <8 x i16>* %4, align 8
%6 = insertelement <8 x i16> %5, i16 %0, i64 0
%7 = bitcast <8 x i16> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <8 x i16>
%9 = insertelement <8 x i16> %8, i16 %0, i64 0
%10 = bitcast <8 x i16> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
store <16 x i8> %12, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB18_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB18_3
; CHECK-FIX-NEXT: b .LBB18_4
; CHECK-FIX-NEXT: .LBB18_2:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB18_4
; CHECK-FIX-NEXT: .LBB18_3:
; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16]
; CHECK-FIX-NEXT: .LBB18_4:
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
br i1 %0, label %5, label %10
5:
%6 = load i16, i16* %1, align 2
%7 = bitcast <16 x i8>* %3 to <8 x i16>*
%8 = load <8 x i16>, <8 x i16>* %7, align 8
%9 = insertelement <8 x i16> %8, i16 %6, i64 0
br label %13
10:
%11 = bitcast <16 x i8>* %3 to <8 x i16>*
%12 = load <8 x i16>, <8 x i16>* %11, align 8
br label %13
13:
%14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
br i1 %0, label %15, label %19
15:
%16 = load i16, i16* %1, align 2
%17 = bitcast <16 x i8> %2 to <8 x i16>
%18 = insertelement <8 x i16> %17, i16 %16, i64 0
br label %21
19:
%20 = bitcast <16 x i8> %2 to <8 x i16>
br label %21
21:
%22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
%23 = bitcast <8 x i16> %14 to <16 x i8>
%24 = bitcast <8 x i16> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
store <16 x i8> %26, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB19_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
; CHECK-FIX-NEXT: .LBB19_2: @ %select.end
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB19_4
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
; CHECK-FIX-NEXT: .LBB19_4: @ %select.end1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = bitcast <16 x i8>* %3 to <8 x i16>*
%6 = load <8 x i16>, <8 x i16>* %5, align 8
%7 = insertelement <8 x i16> %6, i16 %1, i64 0
%8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
%9 = bitcast <16 x i8> %2 to <8 x i16>
%10 = insertelement <8 x i16> %9, i16 %1, i64 0
%11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
%12 = bitcast <8 x i16> %8 to <16 x i8>
%13 = bitcast <8 x i16> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
store <16 x i8> %15, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrh r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strh r1, [r2]
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB20_1:
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB20_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB20_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = load i16, i16* %1, align 2
%6 = bitcast <16 x i8> %2 to <8 x i16>
%7 = insertelement <8 x i16> %6, i16 %5, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
%9 = bitcast <16 x i8>* %3 to i16*
store i16 %5, i16* %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
%12 = load <16 x i8>, <16 x i8>* %3, align 8
br label %15
13:
store <16 x i8> %19, <16 x i8>* %3, align 8
br label %14
14:
ret void
15:
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
%20 = add nuw i32 %17, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %13, label %15
}
define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set16_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB21_1:
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
; CHECK-FIX-NEXT: .LBB21_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bne .LBB21_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %12, label %6
6:
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = insertelement <8 x i16> %7, i16 %1, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to <8 x i16>*
%11 = bitcast <16 x i8>* %3 to i16*
br label %13
12:
ret void
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
%15 = load <8 x i16>, <8 x i16>* %10, align 8
%16 = insertelement <8 x i16> %15, i16 %1, i64 0
%17 = bitcast <8 x i16> %16 to <16 x i8>
store i16 %1, i16* %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = load i32, i32* %0, align 4
%5 = bitcast <16 x i8>* %2 to <4 x i32>*
%6 = load <4 x i32>, <4 x i32>* %5, align 8
%7 = insertelement <4 x i32> %6, i32 %4, i64 0
%8 = bitcast <4 x i32> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <4 x i32>
%10 = insertelement <4 x i32> %9, i32 %4, i64 0
%11 = bitcast <4 x i32> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_set32_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <4 x i32>*
%5 = load <4 x i32>, <4 x i32>* %4, align 8
%6 = insertelement <4 x i32> %5, i32 %0, i64 0
%7 = bitcast <4 x i32> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <4 x i32>
%9 = insertelement <4 x i32> %8, i32 %0, i64 0
%10 = bitcast <4 x i32> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
store <16 x i8> %12, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB24_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB24_3
; CHECK-FIX-NEXT: b .LBB24_4
; CHECK-FIX-NEXT: .LBB24_2:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB24_4
; CHECK-FIX-NEXT: .LBB24_3:
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
; CHECK-FIX-NEXT: .LBB24_4:
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
br i1 %0, label %5, label %10
5:
%6 = load i32, i32* %1, align 4
%7 = bitcast <16 x i8>* %3 to <4 x i32>*
%8 = load <4 x i32>, <4 x i32>* %7, align 8
%9 = insertelement <4 x i32> %8, i32 %6, i64 0
br label %13
10:
%11 = bitcast <16 x i8>* %3 to <4 x i32>*
%12 = load <4 x i32>, <4 x i32>* %11, align 8
br label %13
13:
%14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
br i1 %0, label %15, label %19
15:
%16 = load i32, i32* %1, align 4
%17 = bitcast <16 x i8> %2 to <4 x i32>
%18 = insertelement <4 x i32> %17, i32 %16, i64 0
br label %21
19:
%20 = bitcast <16 x i8> %2 to <4 x i32>
br label %21
21:
%22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
%23 = bitcast <4 x i32> %14 to <16 x i8>
%24 = bitcast <4 x i32> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
store <16 x i8> %26, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB25_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vmov.32 d16[0], r1
; CHECK-FIX-NEXT: .LBB25_2: @ %select.end
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB25_4
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
; CHECK-FIX-NEXT: .LBB25_4: @ %select.end1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = bitcast <16 x i8>* %3 to <4 x i32>*
%6 = load <4 x i32>, <4 x i32>* %5, align 8
%7 = insertelement <4 x i32> %6, i32 %1, i64 0
%8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
%9 = bitcast <16 x i8> %2 to <4 x i32>
%10 = insertelement <4 x i32> %9, i32 %1, i64 0
%11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
%12 = bitcast <4 x i32> %8 to <16 x i8>
%13 = bitcast <4 x i32> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
store <16 x i8> %15, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldr r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: str r1, [r2]
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB26_1:
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB26_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB26_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = load i32, i32* %1, align 4
%6 = bitcast <16 x i8> %2 to <4 x i32>
%7 = insertelement <4 x i32> %6, i32 %5, i64 0
%8 = bitcast <4 x i32> %7 to <16 x i8>
%9 = bitcast <16 x i8>* %3 to i32*
store i32 %5, i32* %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
%12 = load <16 x i8>, <16 x i8>* %3, align 8
br label %15
13:
store <16 x i8> %19, <16 x i8>* %3, align 8
br label %14
14:
ret void
15:
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
%20 = add nuw i32 %17, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %13, label %15
}
define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set32_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB27_1:
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
; CHECK-FIX-NEXT: .LBB27_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: vmov.32 d16[0], r1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bne .LBB27_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %12, label %6
6:
%7 = bitcast <16 x i8> %2 to <4 x i32>
%8 = insertelement <4 x i32> %7, i32 %1, i64 0
%9 = bitcast <4 x i32> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to <4 x i32>*
%11 = bitcast <16 x i8>* %3 to i32*
br label %13
12:
ret void
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
%15 = load <4 x i32>, <4 x i32>* %10, align 8
%16 = insertelement <4 x i32> %15, i32 %1, i64 0
%17 = bitcast <4 x i32> %16 to <16 x i8>
store i32 %1, i32* %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = load i64, i64* %0, align 8
%5 = bitcast <16 x i8>* %2 to <2 x i64>*
%6 = load <2 x i64>, <2 x i64>* %5, align 8
%7 = insertelement <2 x i64> %6, i64 %4, i64 0
%8 = bitcast <2 x i64> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <2 x i64>
%10 = insertelement <2 x i64> %9, i64 %4, i64 0
%11 = bitcast <2 x i64> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_set64_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NEXT: vmov.32 d0[1], r1
; CHECK-FIX-NEXT: vmov.32 d16[1], r1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <2 x i64>*
%5 = load <2 x i64>, <2 x i64>* %4, align 8
%6 = insertelement <2 x i64> %5, i64 %0, i64 0
%7 = bitcast <2 x i64> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <2 x i64>
%9 = insertelement <2 x i64> %8, i64 %0, i64 0
%10 = bitcast <2 x i64> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
store <16 x i8> %12, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB30_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
; CHECK-FIX-NOSCHED-NEXT: b .LBB30_3
; CHECK-FIX-NOSCHED-NEXT: .LBB30_2:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB30_3:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_set64_cond_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB30_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18
; CHECK-CORTEX-FIX-NEXT: b .LBB30_3
; CHECK-CORTEX-FIX-NEXT: .LBB30_2:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: .LBB30_3:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: bx lr
br i1 %0, label %5, label %10
5:
%6 = load i64, i64* %1, align 8
%7 = bitcast <16 x i8>* %3 to <2 x i64>*
%8 = load <2 x i64>, <2 x i64>* %7, align 8
%9 = insertelement <2 x i64> %8, i64 %6, i64 0
br label %13
10:
%11 = bitcast <16 x i8>* %3 to <2 x i64>*
%12 = load <2 x i64>, <2 x i64>* %11, align 8
br label %13
13:
%14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
br i1 %0, label %15, label %19
15:
%16 = load i64, i64* %1, align 8
%17 = bitcast <16 x i8> %2 to <2 x i64>
%18 = insertelement <2 x i64> %17, i64 %16, i64 0
br label %21
19:
%20 = bitcast <16 x i8> %2 to <2 x i64>
br label %21
21:
%22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
%23 = bitcast <2 x i64> %14 to <16 x i8>
%24 = bitcast <2 x i64> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
store <16 x i8> %26, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set64_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldr r1, [sp]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: beq .LBB31_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vmov.32 d16[0], r2
; CHECK-FIX-NEXT: vmov.32 d16[1], r3
; CHECK-FIX-NEXT: .LBB31_2: @ %select.end
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB31_4
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vmov.32 d0[0], r2
; CHECK-FIX-NEXT: vmov.32 d0[1], r3
; CHECK-FIX-NEXT: .LBB31_4: @ %select.end1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%5 = bitcast <16 x i8>* %3 to <2 x i64>*
%6 = load <2 x i64>, <2 x i64>* %5, align 8
%7 = insertelement <2 x i64> %6, i64 %1, i64 0
%8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
%9 = bitcast <16 x i8> %2 to <2 x i64>
%10 = insertelement <2 x i64> %9, i64 %1, i64 0
%11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
%12 = bitcast <2 x i64> %8 to <16 x i8>
%13 = bitcast <2 x i64> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
store <16 x i8> %15, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2]
; CHECK-FIX-NOSCHED-NEXT: beq .LBB32_4
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: bne .LBB32_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB32_4:
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2]
; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc}
; CHECK-CORTEX-FIX-NEXT: .LBB32_1:
; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: bne .LBB32_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
%5 = load i64, i64* %1, align 8
%6 = bitcast <16 x i8> %2 to <2 x i64>
%7 = insertelement <2 x i64> %6, i64 %5, i64 0
%8 = bitcast <2 x i64> %7 to <16 x i8>
%9 = bitcast <16 x i8>* %3 to i64*
store i64 %5, i64* %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
%12 = load <16 x i8>, <16 x i8>* %3, align 8
br label %15
13:
store <16 x i8> %19, <16 x i8>* %3, align 8
br label %14
14:
ret void
15:
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
%20 = add nuw i32 %17, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %13, label %15
}
define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_set64_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB33_1:
; CHECK-FIX-NEXT: vmov.32 d0[0], r2
; CHECK-FIX-NEXT: ldr r1, [sp]
; CHECK-FIX-NEXT: vmov.32 d0[1], r3
; CHECK-FIX-NEXT: .LBB33_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: vmov.32 d16[0], r2
; CHECK-FIX-NEXT: vmov.32 d16[1], r3
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bne .LBB33_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %12, label %6
6:
%7 = bitcast <16 x i8> %2 to <2 x i64>
%8 = insertelement <2 x i64> %7, i64 %1, i64 0
%9 = bitcast <2 x i64> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to <2 x i64>*
%11 = bitcast <16 x i8>* %3 to i64*
br label %13
12:
ret void
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
%15 = load <2 x i64>, <2 x i64>* %10, align 8
%16 = insertelement <2 x i64> %15, i64 %1, i64 0
%17 = bitcast <2 x i64> %16 to <16 x i8>
store i64 %1, i64* %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = bitcast half* %0 to i16*
%5 = load i16, i16* %4, align 2
%6 = bitcast <16 x i8>* %2 to <8 x i16>*
%7 = load <8 x i16>, <8 x i16>* %6, align 8
%8 = insertelement <8 x i16> %7, i16 %5, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
%10 = bitcast <16 x i8> %1 to <8 x i16>
%11 = insertelement <8 x i16> %10, i16 %5, i64 0
%12 = bitcast <8 x i16> %11 to <16 x i8>
%13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12)
%14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13)
store <16 x i8> %14, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_setf16_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vmov r1, s0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vmov.16 d2[0], r1
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
; CHECK-FIX-NEXT: aese.8 q8, q1
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <8 x i16>*
%5 = load <8 x i16>, <8 x i16>* %4, align 8
%6 = bitcast half %0 to i16
%7 = insertelement <8 x i16> %5, i16 %6, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <8 x i16>
%10 = insertelement <8 x i16> %9, i16 %6, i64 0
%11 = bitcast <8 x i16> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: .pad #24
; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_3
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bne .LBB36_4
; CHECK-FIX-NOSCHED-NEXT: .LBB36_2:
; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0
; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16
; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16
; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4
; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3
; CHECK-FIX-NOSCHED-NEXT: b .LBB36_5
; CHECK-FIX-NOSCHED-NEXT: .LBB36_3:
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6]
; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10]
; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_2
; CHECK-FIX-NOSCHED-NEXT: .LBB36_4:
; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1
; CHECK-FIX-NOSCHED-NEXT: mov r4, r7
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1]
; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0]
; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5
; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3
; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7
; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16
; CHECK-FIX-NOSCHED-NEXT: mov r7, r4
; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
; CHECK-FIX-NOSCHED-NEXT: .LBB36_5:
; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16
; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: .pad #24
; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB36_3
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0]
; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: uxth r11, r6
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: bne .LBB36_4
; CHECK-CORTEX-FIX-NEXT: .LBB36_2:
; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0
; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16
; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: mov r0, r3
; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1
; CHECK-CORTEX-FIX-NEXT: uxth r10, r7
; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16
; CHECK-CORTEX-FIX-NEXT: uxth lr, r3
; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
; CHECK-CORTEX-FIX-NEXT: mov r3, r0
; CHECK-CORTEX-FIX-NEXT: b .LBB36_5
; CHECK-CORTEX-FIX-NEXT: .LBB36_3:
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2]
; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12]
; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2
; CHECK-CORTEX-FIX-NEXT: .LBB36_4:
; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1]
; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
; CHECK-CORTEX-FIX-NEXT: uxth r6, r5
; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16
; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1
; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0]
; CHECK-CORTEX-FIX-NEXT: uxth r10, r5
; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16
; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: .LBB36_5:
; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: add sp, sp, #24
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
br i1 %0, label %5, label %12
5:
%6 = bitcast half* %1 to i16*
%7 = load i16, i16* %6, align 2
%8 = bitcast <16 x i8>* %3 to <8 x i16>*
%9 = load <8 x i16>, <8 x i16>* %8, align 8
%10 = insertelement <8 x i16> %9, i16 %7, i64 0
%11 = bitcast <8 x i16> %10 to <8 x half>
br label %15
12:
%13 = bitcast <16 x i8>* %3 to <8 x half>*
%14 = load <8 x half>, <8 x half>* %13, align 8
br label %15
15:
%16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
br i1 %0, label %17, label %23
17:
%18 = bitcast half* %1 to i16*
%19 = load i16, i16* %18, align 2
%20 = bitcast <16 x i8> %2 to <8 x i16>
%21 = insertelement <8 x i16> %20, i16 %19, i64 0
%22 = bitcast <8 x i16> %21 to <8 x half>
br label %25
23:
%24 = bitcast <16 x i8> %2 to <8 x half>
br label %25
25:
%26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
%27 = bitcast <8 x half> %16 to <16 x i8>
%28 = bitcast <8 x half> %26 to <16 x i8>
%29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28)
%30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29)
store <16 x i8> %30, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: .pad #24
; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5
; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: b .LBB37_3
; CHECK-FIX-NOSCHED-NEXT: .LBB37_2:
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6]
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2]
; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #10]
; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #4]
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1]
; CHECK-FIX-NOSCHED-NEXT: .LBB37_3:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_5
; CHECK-FIX-NOSCHED-NEXT: @ %bb.4:
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r6, d2[1]
; CHECK-FIX-NOSCHED-NEXT: mov r3, r2
; CHECK-FIX-NOSCHED-NEXT: mov r2, r7
; CHECK-FIX-NOSCHED-NEXT: vmov r4, r7, d3
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r12
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d2[0]
; CHECK-FIX-NOSCHED-NEXT: uxth r5, r6
; CHECK-FIX-NOSCHED-NEXT: lsr r12, r6, #16
; CHECK-FIX-NOSCHED-NEXT: uxth r10, r4
; CHECK-FIX-NOSCHED-NEXT: uxth r11, r7
; CHECK-FIX-NOSCHED-NEXT: lsr r9, r7, #16
; CHECK-FIX-NOSCHED-NEXT: mov r7, r2
; CHECK-FIX-NOSCHED-NEXT: mov r2, r3
; CHECK-FIX-NOSCHED-NEXT: lsr r4, r4, #16
; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
; CHECK-FIX-NOSCHED-NEXT: b .LBB37_6
; CHECK-FIX-NOSCHED-NEXT: .LBB37_5:
; CHECK-FIX-NOSCHED-NEXT: vmov r3, r6, d3
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r5, d2
; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r9, r6, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r12, r5, #16
; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
; CHECK-FIX-NOSCHED-NEXT: uxth r10, r3
; CHECK-FIX-NOSCHED-NEXT: uxth r5, r5
; CHECK-FIX-NOSCHED-NEXT: .LBB37_6:
; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r12, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r4, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r9, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: .pad #28
; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #28
; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2
; CHECK-CORTEX-FIX-NEXT: vmov.32 r7, d16[0]
; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #24] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: vmov r3, r7, d17
; CHECK-CORTEX-FIX-NEXT: uxth r6, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: uxth r11, r7
; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: b .LBB37_3
; CHECK-CORTEX-FIX-NEXT: .LBB37_2:
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r1, #12]
; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #14]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #24] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #2]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #4]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #6]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #8]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #10]
; CHECK-CORTEX-FIX-NEXT: .LBB37_3:
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB37_5
; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d2[1]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r2
; CHECK-CORTEX-FIX-NEXT: vmov r4, r6, d3
; CHECK-CORTEX-FIX-NEXT: uxth r10, r4
; CHECK-CORTEX-FIX-NEXT: lsr r4, r4, #16
; CHECK-CORTEX-FIX-NEXT: uxth lr, r6
; CHECK-CORTEX-FIX-NEXT: lsr r8, r6, #16
; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[0]
; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: b .LBB37_6
; CHECK-CORTEX-FIX-NEXT: .LBB37_5:
; CHECK-CORTEX-FIX-NEXT: vmov r2, r3, d2
; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: mov r0, r7
; CHECK-CORTEX-FIX-NEXT: vmov r6, r7, d3
; CHECK-CORTEX-FIX-NEXT: uxth r10, r6
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
; CHECK-CORTEX-FIX-NEXT: mov r7, r0
; CHECK-CORTEX-FIX-NEXT: .LBB37_6:
; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r7, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r10, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r2, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r2, r3, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r6, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r3
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r2
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r9, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r4
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r5
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: add sp, sp, #28
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
br i1 %0, label %5, label %11
5:
%6 = bitcast <16 x i8>* %3 to <8 x i16>*
%7 = load <8 x i16>, <8 x i16>* %6, align 8
%8 = bitcast half %1 to i16
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
%10 = bitcast <8 x i16> %9 to <8 x half>
br label %14
11:
%12 = bitcast <16 x i8>* %3 to <8 x half>*
%13 = load <8 x half>, <8 x half>* %12, align 8
br label %14
14:
%15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
br i1 %0, label %16, label %21
16:
%17 = bitcast <16 x i8> %2 to <8 x i16>
%18 = bitcast half %1 to i16
%19 = insertelement <8 x i16> %17, i16 %18, i64 0
%20 = bitcast <8 x i16> %19 to <8 x half>
br label %23
21:
%22 = bitcast <16 x i8> %2 to <8 x half>
br label %23
23:
%24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
%25 = bitcast <8 x half> %15 to <16 x i8>
%26 = bitcast <8 x half> %24 to <16 x i8>
%27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26)
%28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27)
store <16 x i8> %28, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrh r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strh r1, [r2]
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB38_1:
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB38_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB38_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = bitcast half* %1 to i16*
%6 = load i16, i16* %5, align 2
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = insertelement <8 x i16> %7, i16 %6, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to i16*
store i16 %6, i16* %10, align 8
%11 = icmp eq i32 %0, 0
br i1 %11, label %15, label %12
12:
%13 = load <16 x i8>, <16 x i8>* %3, align 8
br label %16
14:
store <16 x i8> %20, <16 x i8>* %3, align 8
br label %15
15:
ret void
16:
%17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
%18 = phi i32 [ 0, %12 ], [ %21, %16 ]
%19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
%21 = add nuw i32 %18, 1
%22 = icmp eq i32 %21, %0
br i1 %22, label %14, label %16
}
define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_setf16_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB39_1:
; CHECK-FIX-NEXT: vmov r2, s0
; CHECK-FIX-NEXT: vmov.16 d2[0], r2
; CHECK-FIX-NEXT: .LBB39_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: vmov.16 d16[0], r2
; CHECK-FIX-NEXT: aese.8 q8, q1
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bne .LBB39_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %13, label %6
6:
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = bitcast half %1 to i16
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
%10 = bitcast <8 x i16> %9 to <16 x i8>
%11 = bitcast <16 x i8>* %3 to <8 x i16>*
%12 = bitcast <16 x i8>* %3 to half*
br label %14
13:
ret void
14:
%15 = phi i32 [ 0, %6 ], [ %21, %14 ]
%16 = load <8 x i16>, <8 x i16>* %11, align 8
%17 = insertelement <8 x i16> %16, i16 %8, i64 0
%18 = bitcast <8 x i16> %17 to <16 x i8>
store half %1, half* %12, align 8
%19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10)
%20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
store <16 x i8> %20, <16 x i8>* %3, align 8
%21 = add nuw i32 %15, 1
%22 = icmp eq i32 %21, %0
br i1 %22, label %13, label %14
}
define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_setf32_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vldr s0, [r0]
; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
; CHECK-FIX-NEXT: vmov.f32 s4, s0
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aese.8 q1, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q1
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%4 = load float, float* %0, align 4
%5 = bitcast <16 x i8>* %2 to <4 x float>*
%6 = load <4 x float>, <4 x float>* %5, align 8
%7 = insertelement <4 x float> %6, float %4, i64 0
%8 = bitcast <4 x float> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <4 x float>
%10 = insertelement <4 x float> %9, float %4, i64 0
%11 = bitcast <4 x float> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aese_setf32_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vmov.f32 s4, s0
; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
; CHECK-FIX-NEXT: vmov.f32 s0, s4
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: aese.8 q0, q1
; CHECK-FIX-NEXT: aesmc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <4 x float>*
%5 = load <4 x float>, <4 x float>* %4, align 8
%6 = insertelement <4 x float> %5, float %0, i64 0
%7 = bitcast <4 x float> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <4 x float>
%9 = insertelement <4 x float> %8, float %0, i64 0
%10 = bitcast <4 x float> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
store <16 x i8> %12, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB42_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB42_3
; CHECK-FIX-NEXT: b .LBB42_4
; CHECK-FIX-NEXT: .LBB42_2:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB42_4
; CHECK-FIX-NEXT: .LBB42_3:
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
; CHECK-FIX-NEXT: .LBB42_4:
; CHECK-FIX-NEXT: aese.8 q8, q0
; CHECK-FIX-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
br i1 %0, label %5, label %10
5:
%6 = load float, float* %1, align 4
%7 = bitcast <16 x i8>* %3 to <4 x float>*
%8 = load <4 x float>, <4 x float>* %7, align 8
%9 = insertelement <4 x float> %8, float %6, i64 0
br label %13
10:
%11 = bitcast <16 x i8>* %3 to <4 x float>*
%12 = load <4 x float>, <4 x float>* %11, align 8
br label %13
13:
%14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
br i1 %0, label %15, label %19
15:
%16 = load float, float* %1, align 4
%17 = bitcast <16 x i8> %2 to <4 x float>
%18 = insertelement <4 x float> %17, float %16, i64 0
br label %21
19:
%20 = bitcast <16 x i8> %2 to <4 x float>
br label %21
21:
%22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
%23 = bitcast <4 x float> %14 to <16 x i8>
%24 = bitcast <4 x float> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
store <16 x i8> %26, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_setf32_cond_via_val:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%5 = bitcast <16 x i8>* %3 to <4 x float>*
%6 = load <4 x float>, <4 x float>* %5, align 8
%7 = insertelement <4 x float> %6, float %1, i64 0
%8 = select i1 %0, <4 x float> %7, <4 x float> %6
%9 = bitcast <16 x i8> %2 to <4 x float>
%10 = insertelement <4 x float> %9, float %1, i64 0
%11 = select i1 %0, <4 x float> %10, <4 x float> %9
%12 = bitcast <4 x float> %8 to <16 x i8>
%13 = bitcast <4 x float> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
store <16 x i8> %15, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2]
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
; CHECK-FIX-NOSCHED-NEXT: .LBB44_1:
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: bne .LBB44_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2]
; CHECK-CORTEX-FIX-NEXT: bxeq lr
; CHECK-CORTEX-FIX-NEXT: .LBB44_1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
; CHECK-CORTEX-FIX-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: bne .LBB44_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: bx lr
%5 = load float, float* %1, align 4
%6 = bitcast <16 x i8> %2 to <4 x float>
%7 = insertelement <4 x float> %6, float %5, i64 0
%8 = bitcast <4 x float> %7 to <16 x i8>
%9 = bitcast <16 x i8>* %3 to float*
store float %5, float* %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
%12 = load <16 x i8>, <16 x i8>* %3, align 8
br label %15
13:
store <16 x i8> %19, <16 x i8>* %3, align 8
br label %14
14:
ret void
15:
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
%20 = add nuw i32 %17, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %13, label %15
}
define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
; CHECK-FIX-NOSCHED-NEXT: .LBB45_1:
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0
; CHECK-FIX-NOSCHED-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bne .LBB45_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_val:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: bxeq lr
; CHECK-CORTEX-FIX-NEXT: .LBB45_1:
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0
; CHECK-CORTEX-FIX-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bne .LBB45_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %12, label %6
6:
%7 = bitcast <16 x i8> %2 to <4 x float>
%8 = insertelement <4 x float> %7, float %1, i64 0
%9 = bitcast <4 x float> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to <4 x float>*
%11 = bitcast <16 x i8>* %3 to float*
br label %13
12:
ret void
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
%15 = load <4 x float>, <4 x float>* %10, align 8
%16 = insertelement <4 x float> %15, float %1, i64 0
%17 = bitcast <4 x float> %16 to <16 x i8>
store float %1, float* %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
define arm_aapcs_vfpcc void @aesd_zero(<16 x i8>* %0) nounwind {
; CHECK-FIX-LABEL: aesd_zero:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vmov.i32 q9, #0x0
; CHECK-FIX-NEXT: aesd.8 q9, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
%2 = load <16 x i8>, <16 x i8>* %0, align 8
%3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2)
%4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
store <16 x i8> %4, <16 x i8>* %0, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_via_call1(<16 x i8>* %0) nounwind {
; CHECK-FIX-LABEL: aesd_via_call1:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_input
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
%3 = load <16 x i8>, <16 x i8>* %0, align 8
%4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3)
%5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4)
store <16 x i8> %5, <16 x i8>* %0, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_via_call2(half %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-LABEL: aesd_via_call2:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_inputf16
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
%4 = load <16 x i8>, <16 x i8>* %1, align 8
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
store <16 x i8> %6, <16 x i8>* %1, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_via_call3(float %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-LABEL: aesd_via_call3:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: .save {r4, lr}
; CHECK-FIX-NEXT: push {r4, lr}
; CHECK-FIX-NEXT: mov r4, r0
; CHECK-FIX-NEXT: bl get_inputf32
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: aesd.8 q0, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
; CHECK-FIX-NEXT: pop {r4, pc}
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
%4 = load <16 x i8>, <16 x i8>* %1, align 8
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
store <16 x i8> %6, <16 x i8>* %1, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-LABEL: aesd_once_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-FIX-NEXT: aesd.8 q9, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%3 = load <16 x i8>, <16 x i8>* %1, align 8
%4 = load <16 x i8>, <16 x i8>* %0, align 8
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
store <16 x i8> %6, <16 x i8>* %1, align 8
ret void
}
define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aesd_once_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aesd.8 q1, q0
; CHECK-FIX-NEXT: aesimc.8 q0, q1
; CHECK-FIX-NEXT: bx lr
%3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
%4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
ret <16 x i8> %4
}
define arm_aapcs_vfpcc void @aesd_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
; CHECK-FIX-LABEL: aesd_twice_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-FIX-NEXT: aesd.8 q9, q8
; CHECK-FIX-NEXT: aesimc.8 q8, q9
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
; CHECK-FIX-NEXT: aesd.8 q8, q9
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%3 = load <16 x i8>, <16 x i8>* %1, align 8
%4 = load <16 x i8>, <16 x i8>* %0, align 8
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
store <16 x i8> %6, <16 x i8>* %1, align 8
%7 = load <16 x i8>, <16 x i8>* %0, align 8
%8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
%9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
store <16 x i8> %9, <16 x i8>* %1, align 8
ret void
}
define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
; CHECK-FIX-LABEL: aesd_twice_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aesd.8 q1, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q0, q8
; CHECK-FIX-NEXT: bx lr
%3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
%4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %4, <16 x i8> %0)
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
ret <16 x i8> %6
}
define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
; CHECK-FIX-NOSCHED-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2]
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q9, q8
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: bne .LBB54_1
; CHECK-FIX-NOSCHED-NEXT: @ %bb.2:
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_loop_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: bxeq lr
; CHECK-CORTEX-FIX-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2]
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: bne .LBB54_1
; CHECK-CORTEX-FIX-NEXT: @ %bb.2:
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = icmp eq i32 %0, 0
br i1 %4, label %5, label %6
5:
ret void
6:
%7 = phi i32 [ %12, %6 ], [ 0, %3 ]
%8 = load <16 x i8>, <16 x i8>* %2, align 8
%9 = load <16 x i8>, <16 x i8>* %1, align 8
%10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9)
%11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
store <16 x i8> %11, <16 x i8>* %2, align 8
%12 = add nuw i32 %7, 1
%13 = icmp eq i32 %12, %0
br i1 %13, label %5, label %6
}
define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
; CHECK-FIX-LABEL: aesd_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB55_2
; CHECK-FIX-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aesd.8 q1, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesimc.8 q1, q1
; CHECK-FIX-NEXT: bne .LBB55_1
; CHECK-FIX-NEXT: .LBB55_2:
; CHECK-FIX-NEXT: vorr q0, q1, q1
; CHECK-FIX-NEXT: bx lr
%4 = icmp eq i32 %0, 0
br i1 %4, label %5, label %7
5:
%6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
ret <16 x i8> %6
7:
%8 = phi i32 [ %12, %7 ], [ 0, %3 ]
%9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
%10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %1)
%11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
%12 = add nuw i32 %8, 1
%13 = icmp eq i32 %12, %0
br i1 %13, label %5, label %7
}
define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = load i8, i8* %0, align 1
%5 = load <16 x i8>, <16 x i8>* %2, align 8
%6 = insertelement <16 x i8> %5, i8 %4, i64 0
%7 = insertelement <16 x i8> %1, i8 %4, i64 0
%8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
%9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
store <16 x i8> %9, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_set8_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.8 d0[0], r0
; CHECK-FIX-NEXT: vmov.8 d16[0], r0
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%4 = load <16 x i8>, <16 x i8>* %2, align 8
%5 = insertelement <16 x i8> %4, i8 %0, i64 0
%6 = insertelement <16 x i8> %1, i8 %0, i64 0
%7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6)
%8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7)
store <16 x i8> %8, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB58_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB58_3
; CHECK-FIX-NEXT: b .LBB58_4
; CHECK-FIX-NEXT: .LBB58_2:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB58_4
; CHECK-FIX-NEXT: .LBB58_3:
; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1]
; CHECK-FIX-NEXT: .LBB58_4:
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
br i1 %0, label %5, label %9
5:
%6 = load i8, i8* %1, align 1
%7 = load <16 x i8>, <16 x i8>* %3, align 8
%8 = insertelement <16 x i8> %7, i8 %6, i64 0
br label %11
9:
%10 = load <16 x i8>, <16 x i8>* %3, align 8
br label %11
11:
%12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
br i1 %0, label %13, label %16
13:
%14 = load i8, i8* %1, align 1
%15 = insertelement <16 x i8> %2, i8 %14, i64 0
br label %16
16:
%17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB59_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vmov.8 d16[0], r1
; CHECK-FIX-NEXT: .LBB59_2: @ %select.end
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB59_4
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
; CHECK-FIX-NEXT: .LBB59_4: @ %select.end1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = load <16 x i8>, <16 x i8>* %3, align 8
%6 = insertelement <16 x i8> %5, i8 %1, i64 0
%7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
%8 = insertelement <16 x i8> %2, i8 %1, i64 0
%9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
%10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9)
%11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
store <16 x i8> %11, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrb r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strb r1, [r2]
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB60_1:
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB60_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB60_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = load i8, i8* %1, align 1
%6 = insertelement <16 x i8> %2, i8 %5, i64 0
%7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0
store i8 %5, i8* %7, align 8
%8 = icmp eq i32 %0, 0
br i1 %8, label %12, label %9
9:
%10 = load <16 x i8>, <16 x i8>* %3, align 8
br label %13
11:
store <16 x i8> %17, <16 x i8>* %3, align 8
br label %12
12:
ret void
13:
%14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
%15 = phi i32 [ 0, %9 ], [ %18, %13 ]
%16 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %6)
%17 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %16)
%18 = add nuw i32 %15, 1
%19 = icmp eq i32 %18, %0
br i1 %19, label %11, label %13
}
define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set8_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB61_1:
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB61_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vmov.8 d16[0], r1
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB61_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %10, label %6
6:
%7 = insertelement <16 x i8> %2, i8 %1, i64 0
%8 = load <16 x i8>, <16 x i8>* %3, align 8
br label %11
9:
store <16 x i8> %16, <16 x i8>* %3, align 8
br label %10
10:
ret void
11:
%12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
%13 = phi i32 [ 0, %6 ], [ %17, %11 ]
%14 = insertelement <16 x i8> %12, i8 %1, i64 0
%15 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %7)
%16 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %15)
%17 = add nuw i32 %13, 1
%18 = icmp eq i32 %17, %0
br i1 %18, label %9, label %11
}
define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = load i16, i16* %0, align 2
%5 = bitcast <16 x i8>* %2 to <8 x i16>*
%6 = load <8 x i16>, <8 x i16>* %5, align 8
%7 = insertelement <8 x i16> %6, i16 %4, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <8 x i16>
%10 = insertelement <8 x i16> %9, i16 %4, i64 0
%11 = bitcast <8 x i16> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_set16_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.16 d0[0], r0
; CHECK-FIX-NEXT: vmov.16 d16[0], r0
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <8 x i16>*
%5 = load <8 x i16>, <8 x i16>* %4, align 8
%6 = insertelement <8 x i16> %5, i16 %0, i64 0
%7 = bitcast <8 x i16> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <8 x i16>
%9 = insertelement <8 x i16> %8, i16 %0, i64 0
%10 = bitcast <8 x i16> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
store <16 x i8> %12, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB64_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB64_3
; CHECK-FIX-NEXT: b .LBB64_4
; CHECK-FIX-NEXT: .LBB64_2:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB64_4
; CHECK-FIX-NEXT: .LBB64_3:
; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16]
; CHECK-FIX-NEXT: .LBB64_4:
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
br i1 %0, label %5, label %10
5:
%6 = load i16, i16* %1, align 2
%7 = bitcast <16 x i8>* %3 to <8 x i16>*
%8 = load <8 x i16>, <8 x i16>* %7, align 8
%9 = insertelement <8 x i16> %8, i16 %6, i64 0
br label %13
10:
%11 = bitcast <16 x i8>* %3 to <8 x i16>*
%12 = load <8 x i16>, <8 x i16>* %11, align 8
br label %13
13:
%14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
br i1 %0, label %15, label %19
15:
%16 = load i16, i16* %1, align 2
%17 = bitcast <16 x i8> %2 to <8 x i16>
%18 = insertelement <8 x i16> %17, i16 %16, i64 0
br label %21
19:
%20 = bitcast <16 x i8> %2 to <8 x i16>
br label %21
21:
%22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
%23 = bitcast <8 x i16> %14 to <16 x i8>
%24 = bitcast <8 x i16> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
store <16 x i8> %26, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB65_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
; CHECK-FIX-NEXT: .LBB65_2: @ %select.end
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB65_4
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
; CHECK-FIX-NEXT: .LBB65_4: @ %select.end1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = bitcast <16 x i8>* %3 to <8 x i16>*
%6 = load <8 x i16>, <8 x i16>* %5, align 8
%7 = insertelement <8 x i16> %6, i16 %1, i64 0
%8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
%9 = bitcast <16 x i8> %2 to <8 x i16>
%10 = insertelement <8 x i16> %9, i16 %1, i64 0
%11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
%12 = bitcast <8 x i16> %8 to <16 x i8>
%13 = bitcast <8 x i16> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
store <16 x i8> %15, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrh r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strh r1, [r2]
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB66_1:
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB66_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB66_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = load i16, i16* %1, align 2
%6 = bitcast <16 x i8> %2 to <8 x i16>
%7 = insertelement <8 x i16> %6, i16 %5, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
%9 = bitcast <16 x i8>* %3 to i16*
store i16 %5, i16* %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
%12 = load <16 x i8>, <16 x i8>* %3, align 8
br label %15
13:
store <16 x i8> %19, <16 x i8>* %3, align 8
br label %14
14:
ret void
15:
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
%20 = add nuw i32 %17, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %13, label %15
}
define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set16_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB67_1:
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
; CHECK-FIX-NEXT: .LBB67_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bne .LBB67_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %12, label %6
6:
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = insertelement <8 x i16> %7, i16 %1, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to <8 x i16>*
%11 = bitcast <16 x i8>* %3 to i16*
br label %13
12:
ret void
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
%15 = load <8 x i16>, <8 x i16>* %10, align 8
%16 = insertelement <8 x i16> %15, i16 %1, i64 0
%17 = bitcast <8 x i16> %16 to <16 x i8>
store i16 %1, i16* %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = load i32, i32* %0, align 4
%5 = bitcast <16 x i8>* %2 to <4 x i32>*
%6 = load <4 x i32>, <4 x i32>* %5, align 8
%7 = insertelement <4 x i32> %6, i32 %4, i64 0
%8 = bitcast <4 x i32> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <4 x i32>
%10 = insertelement <4 x i32> %9, i32 %4, i64 0
%11 = bitcast <4 x i32> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_set32_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <4 x i32>*
%5 = load <4 x i32>, <4 x i32>* %4, align 8
%6 = insertelement <4 x i32> %5, i32 %0, i64 0
%7 = bitcast <4 x i32> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <4 x i32>
%9 = insertelement <4 x i32> %8, i32 %0, i64 0
%10 = bitcast <4 x i32> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
store <16 x i8> %12, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB70_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB70_3
; CHECK-FIX-NEXT: b .LBB70_4
; CHECK-FIX-NEXT: .LBB70_2:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB70_4
; CHECK-FIX-NEXT: .LBB70_3:
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
; CHECK-FIX-NEXT: .LBB70_4:
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
br i1 %0, label %5, label %10
5:
%6 = load i32, i32* %1, align 4
%7 = bitcast <16 x i8>* %3 to <4 x i32>*
%8 = load <4 x i32>, <4 x i32>* %7, align 8
%9 = insertelement <4 x i32> %8, i32 %6, i64 0
br label %13
10:
%11 = bitcast <16 x i8>* %3 to <4 x i32>*
%12 = load <4 x i32>, <4 x i32>* %11, align 8
br label %13
13:
%14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
br i1 %0, label %15, label %19
15:
%16 = load i32, i32* %1, align 4
%17 = bitcast <16 x i8> %2 to <4 x i32>
%18 = insertelement <4 x i32> %17, i32 %16, i64 0
br label %21
19:
%20 = bitcast <16 x i8> %2 to <4 x i32>
br label %21
21:
%22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
%23 = bitcast <4 x i32> %14 to <16 x i8>
%24 = bitcast <4 x i32> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
store <16 x i8> %26, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB71_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vmov.32 d16[0], r1
; CHECK-FIX-NEXT: .LBB71_2: @ %select.end
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB71_4
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
; CHECK-FIX-NEXT: .LBB71_4: @ %select.end1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = bitcast <16 x i8>* %3 to <4 x i32>*
%6 = load <4 x i32>, <4 x i32>* %5, align 8
%7 = insertelement <4 x i32> %6, i32 %1, i64 0
%8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
%9 = bitcast <16 x i8> %2 to <4 x i32>
%10 = insertelement <4 x i32> %9, i32 %1, i64 0
%11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
%12 = bitcast <4 x i32> %8 to <16 x i8>
%13 = bitcast <4 x i32> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
store <16 x i8> %15, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldr r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: str r1, [r2]
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB72_1:
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB72_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB72_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = load i32, i32* %1, align 4
%6 = bitcast <16 x i8> %2 to <4 x i32>
%7 = insertelement <4 x i32> %6, i32 %5, i64 0
%8 = bitcast <4 x i32> %7 to <16 x i8>
%9 = bitcast <16 x i8>* %3 to i32*
store i32 %5, i32* %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
%12 = load <16 x i8>, <16 x i8>* %3, align 8
br label %15
13:
store <16 x i8> %19, <16 x i8>* %3, align 8
br label %14
14:
ret void
15:
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
%20 = add nuw i32 %17, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %13, label %15
}
define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set32_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB73_1:
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
; CHECK-FIX-NEXT: .LBB73_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: vmov.32 d16[0], r1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bne .LBB73_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %12, label %6
6:
%7 = bitcast <16 x i8> %2 to <4 x i32>
%8 = insertelement <4 x i32> %7, i32 %1, i64 0
%9 = bitcast <4 x i32> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to <4 x i32>*
%11 = bitcast <16 x i8>* %3 to i32*
br label %13
12:
ret void
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
%15 = load <4 x i32>, <4 x i32>* %10, align 8
%16 = insertelement <4 x i32> %15, i32 %1, i64 0
%17 = bitcast <4 x i32> %16 to <16 x i8>
store i32 %1, i32* %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = load i64, i64* %0, align 8
%5 = bitcast <16 x i8>* %2 to <2 x i64>*
%6 = load <2 x i64>, <2 x i64>* %5, align 8
%7 = insertelement <2 x i64> %6, i64 %4, i64 0
%8 = bitcast <2 x i64> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <2 x i64>
%10 = insertelement <2 x i64> %9, i64 %4, i64 0
%11 = bitcast <2 x i64> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_set64_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NEXT: vmov.32 d0[1], r1
; CHECK-FIX-NEXT: vmov.32 d16[1], r1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <2 x i64>*
%5 = load <2 x i64>, <2 x i64>* %4, align 8
%6 = insertelement <2 x i64> %5, i64 %0, i64 0
%7 = bitcast <2 x i64> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <2 x i64>
%9 = insertelement <2 x i64> %8, i64 %0, i64 0
%10 = bitcast <2 x i64> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
store <16 x i8> %12, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB76_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
; CHECK-FIX-NOSCHED-NEXT: b .LBB76_3
; CHECK-FIX-NOSCHED-NEXT: .LBB76_2:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB76_3:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB76_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18
; CHECK-CORTEX-FIX-NEXT: b .LBB76_3
; CHECK-CORTEX-FIX-NEXT: .LBB76_2:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: .LBB76_3:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: bx lr
br i1 %0, label %5, label %10
5:
%6 = load i64, i64* %1, align 8
%7 = bitcast <16 x i8>* %3 to <2 x i64>*
%8 = load <2 x i64>, <2 x i64>* %7, align 8
%9 = insertelement <2 x i64> %8, i64 %6, i64 0
br label %13
10:
%11 = bitcast <16 x i8>* %3 to <2 x i64>*
%12 = load <2 x i64>, <2 x i64>* %11, align 8
br label %13
13:
%14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
br i1 %0, label %15, label %19
15:
%16 = load i64, i64* %1, align 8
%17 = bitcast <16 x i8> %2 to <2 x i64>
%18 = insertelement <2 x i64> %17, i64 %16, i64 0
br label %21
19:
%20 = bitcast <16 x i8> %2 to <2 x i64>
br label %21
21:
%22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
%23 = bitcast <2 x i64> %14 to <16 x i8>
%24 = bitcast <2 x i64> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
store <16 x i8> %26, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set64_cond_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldr r1, [sp]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: beq .LBB77_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vmov.32 d16[0], r2
; CHECK-FIX-NEXT: vmov.32 d16[1], r3
; CHECK-FIX-NEXT: .LBB77_2: @ %select.end
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB77_4
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vmov.32 d0[0], r2
; CHECK-FIX-NEXT: vmov.32 d0[1], r3
; CHECK-FIX-NEXT: .LBB77_4: @ %select.end1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%5 = bitcast <16 x i8>* %3 to <2 x i64>*
%6 = load <2 x i64>, <2 x i64>* %5, align 8
%7 = insertelement <2 x i64> %6, i64 %1, i64 0
%8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
%9 = bitcast <16 x i8> %2 to <2 x i64>
%10 = insertelement <2 x i64> %9, i64 %1, i64 0
%11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
%12 = bitcast <2 x i64> %8 to <16 x i8>
%13 = bitcast <2 x i64> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
store <16 x i8> %15, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2]
; CHECK-FIX-NOSCHED-NEXT: beq .LBB78_4
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: bne .LBB78_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB78_4:
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2]
; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc}
; CHECK-CORTEX-FIX-NEXT: .LBB78_1:
; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: bne .LBB78_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
%5 = load i64, i64* %1, align 8
%6 = bitcast <16 x i8> %2 to <2 x i64>
%7 = insertelement <2 x i64> %6, i64 %5, i64 0
%8 = bitcast <2 x i64> %7 to <16 x i8>
%9 = bitcast <16 x i8>* %3 to i64*
store i64 %5, i64* %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
%12 = load <16 x i8>, <16 x i8>* %3, align 8
br label %15
13:
store <16 x i8> %19, <16 x i8>* %3, align 8
br label %14
14:
ret void
15:
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
%20 = add nuw i32 %17, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %13, label %15
}
define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_set64_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB79_1:
; CHECK-FIX-NEXT: vmov.32 d0[0], r2
; CHECK-FIX-NEXT: ldr r1, [sp]
; CHECK-FIX-NEXT: vmov.32 d0[1], r3
; CHECK-FIX-NEXT: .LBB79_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: vmov.32 d16[0], r2
; CHECK-FIX-NEXT: vmov.32 d16[1], r3
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bne .LBB79_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %12, label %6
6:
%7 = bitcast <16 x i8> %2 to <2 x i64>
%8 = insertelement <2 x i64> %7, i64 %1, i64 0
%9 = bitcast <2 x i64> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to <2 x i64>*
%11 = bitcast <16 x i8>* %3 to i64*
br label %13
12:
ret void
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
%15 = load <2 x i64>, <2 x i64>* %10, align 8
%16 = insertelement <2 x i64> %15, i64 %1, i64 0
%17 = bitcast <2 x i64> %16 to <16 x i8>
store i64 %1, i64* %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%4 = bitcast half* %0 to i16*
%5 = load i16, i16* %4, align 2
%6 = bitcast <16 x i8>* %2 to <8 x i16>*
%7 = load <8 x i16>, <8 x i16>* %6, align 8
%8 = insertelement <8 x i16> %7, i16 %5, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
%10 = bitcast <16 x i8> %1 to <8 x i16>
%11 = insertelement <8 x i16> %10, i16 %5, i64 0
%12 = bitcast <8 x i16> %11 to <16 x i8>
%13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12)
%14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13)
store <16 x i8> %14, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vmov r1, s0
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: vmov.16 d2[0], r1
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
; CHECK-FIX-NEXT: aesd.8 q8, q1
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <8 x i16>*
%5 = load <8 x i16>, <8 x i16>* %4, align 8
%6 = bitcast half %0 to i16
%7 = insertelement <8 x i16> %5, i16 %6, i64 0
%8 = bitcast <8 x i16> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <8 x i16>
%10 = insertelement <8 x i16> %9, i16 %6, i64 0
%11 = bitcast <8 x i16> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: .pad #24
; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_3
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bne .LBB82_4
; CHECK-FIX-NOSCHED-NEXT: .LBB82_2:
; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0
; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16
; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16
; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4
; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3
; CHECK-FIX-NOSCHED-NEXT: b .LBB82_5
; CHECK-FIX-NOSCHED-NEXT: .LBB82_3:
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6]
; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10]
; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_2
; CHECK-FIX-NOSCHED-NEXT: .LBB82_4:
; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1
; CHECK-FIX-NOSCHED-NEXT: mov r4, r7
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1]
; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0]
; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5
; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3
; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7
; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16
; CHECK-FIX-NOSCHED-NEXT: mov r7, r4
; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
; CHECK-FIX-NOSCHED-NEXT: .LBB82_5:
; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16
; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: .pad #24
; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB82_3
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0]
; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: uxth r11, r6
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: bne .LBB82_4
; CHECK-CORTEX-FIX-NEXT: .LBB82_2:
; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0
; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16
; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: mov r0, r3
; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1
; CHECK-CORTEX-FIX-NEXT: uxth r10, r7
; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16
; CHECK-CORTEX-FIX-NEXT: uxth lr, r3
; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
; CHECK-CORTEX-FIX-NEXT: mov r3, r0
; CHECK-CORTEX-FIX-NEXT: b .LBB82_5
; CHECK-CORTEX-FIX-NEXT: .LBB82_3:
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2]
; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12]
; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2
; CHECK-CORTEX-FIX-NEXT: .LBB82_4:
; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1]
; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
; CHECK-CORTEX-FIX-NEXT: uxth r6, r5
; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16
; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1
; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0]
; CHECK-CORTEX-FIX-NEXT: uxth r10, r5
; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16
; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: .LBB82_5:
; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: add sp, sp, #24
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
br i1 %0, label %5, label %12
5:
%6 = bitcast half* %1 to i16*
%7 = load i16, i16* %6, align 2
%8 = bitcast <16 x i8>* %3 to <8 x i16>*
%9 = load <8 x i16>, <8 x i16>* %8, align 8
%10 = insertelement <8 x i16> %9, i16 %7, i64 0
%11 = bitcast <8 x i16> %10 to <8 x half>
br label %15
12:
%13 = bitcast <16 x i8>* %3 to <8 x half>*
%14 = load <8 x half>, <8 x half>* %13, align 8
br label %15
15:
%16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
br i1 %0, label %17, label %23
17:
%18 = bitcast half* %1 to i16*
%19 = load i16, i16* %18, align 2
%20 = bitcast <16 x i8> %2 to <8 x i16>
%21 = insertelement <8 x i16> %20, i16 %19, i64 0
%22 = bitcast <8 x i16> %21 to <8 x half>
br label %25
23:
%24 = bitcast <16 x i8> %2 to <8 x half>
br label %25
25:
%26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
%27 = bitcast <8 x half> %16 to <16 x i8>
%28 = bitcast <8 x half> %26 to <16 x i8>
%29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28)
%30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29)
store <16 x i8> %30, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FIX-NOSCHED-NEXT: .pad #24
; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5
; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: b .LBB83_3
; CHECK-FIX-NOSCHED-NEXT: .LBB83_2:
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8]
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6]
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2]
; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #10]
; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #4]
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1]
; CHECK-FIX-NOSCHED-NEXT: .LBB83_3:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_5
; CHECK-FIX-NOSCHED-NEXT: @ %bb.4:
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r6, d2[1]
; CHECK-FIX-NOSCHED-NEXT: mov r3, r2
; CHECK-FIX-NOSCHED-NEXT: mov r2, r7
; CHECK-FIX-NOSCHED-NEXT: vmov r4, r7, d3
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r12
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d2[0]
; CHECK-FIX-NOSCHED-NEXT: uxth r5, r6
; CHECK-FIX-NOSCHED-NEXT: lsr r12, r6, #16
; CHECK-FIX-NOSCHED-NEXT: uxth r10, r4
; CHECK-FIX-NOSCHED-NEXT: uxth r11, r7
; CHECK-FIX-NOSCHED-NEXT: lsr r9, r7, #16
; CHECK-FIX-NOSCHED-NEXT: mov r7, r2
; CHECK-FIX-NOSCHED-NEXT: mov r2, r3
; CHECK-FIX-NOSCHED-NEXT: lsr r4, r4, #16
; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
; CHECK-FIX-NOSCHED-NEXT: b .LBB83_6
; CHECK-FIX-NOSCHED-NEXT: .LBB83_5:
; CHECK-FIX-NOSCHED-NEXT: vmov r3, r6, d3
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r5, d2
; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r9, r6, #16
; CHECK-FIX-NOSCHED-NEXT: lsr r12, r5, #16
; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
; CHECK-FIX-NOSCHED-NEXT: uxth r10, r3
; CHECK-FIX-NOSCHED-NEXT: uxth r5, r5
; CHECK-FIX-NOSCHED-NEXT: .LBB83_6:
; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r12, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r4, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r9, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-CORTEX-FIX-NEXT: .pad #28
; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #28
; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2
; CHECK-CORTEX-FIX-NEXT: vmov.32 r7, d16[0]
; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #24] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: vmov r3, r7, d17
; CHECK-CORTEX-FIX-NEXT: uxth r6, r3
; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
; CHECK-CORTEX-FIX-NEXT: uxth r11, r7
; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: b .LBB83_3
; CHECK-CORTEX-FIX-NEXT: .LBB83_2:
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r1, #12]
; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #14]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #24] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #2]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #4]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #6]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #8]
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #10]
; CHECK-CORTEX-FIX-NEXT: .LBB83_3:
; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: beq .LBB83_5
; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d2[1]
; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r2
; CHECK-CORTEX-FIX-NEXT: vmov r4, r6, d3
; CHECK-CORTEX-FIX-NEXT: uxth r10, r4
; CHECK-CORTEX-FIX-NEXT: lsr r4, r4, #16
; CHECK-CORTEX-FIX-NEXT: uxth lr, r6
; CHECK-CORTEX-FIX-NEXT: lsr r8, r6, #16
; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[0]
; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: b .LBB83_6
; CHECK-CORTEX-FIX-NEXT: .LBB83_5:
; CHECK-CORTEX-FIX-NEXT: vmov r2, r3, d2
; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-CORTEX-FIX-NEXT: mov r0, r7
; CHECK-CORTEX-FIX-NEXT: vmov r6, r7, d3
; CHECK-CORTEX-FIX-NEXT: uxth r10, r6
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
; CHECK-CORTEX-FIX-NEXT: mov r7, r0
; CHECK-CORTEX-FIX-NEXT: .LBB83_6:
; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r7, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r10, r4, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r2, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r2, r3, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r6, lsl #16
; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp] @ 4-byte Reload
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r3
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r2
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r9, lsl #16
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r4
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r5
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: add sp, sp, #28
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
br i1 %0, label %5, label %11
5:
%6 = bitcast <16 x i8>* %3 to <8 x i16>*
%7 = load <8 x i16>, <8 x i16>* %6, align 8
%8 = bitcast half %1 to i16
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
%10 = bitcast <8 x i16> %9 to <8 x half>
br label %14
11:
%12 = bitcast <16 x i8>* %3 to <8 x half>*
%13 = load <8 x half>, <8 x half>* %12, align 8
br label %14
14:
%15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
br i1 %0, label %16, label %21
16:
%17 = bitcast <16 x i8> %2 to <8 x i16>
%18 = bitcast half %1 to i16
%19 = insertelement <8 x i16> %17, i16 %18, i64 0
%20 = bitcast <8 x i16> %19 to <8 x half>
br label %23
21:
%22 = bitcast <16 x i8> %2 to <8 x half>
br label %23
23:
%24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
%25 = bitcast <8 x half> %15 to <16 x i8>
%26 = bitcast <8 x half> %24 to <16 x i8>
%27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26)
%28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27)
store <16 x i8> %28, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: ldrh r1, [r1]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: strh r1, [r2]
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB84_1:
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: .LBB84_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: bne .LBB84_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
%5 = bitcast half* %1 to i16*
%6 = load i16, i16* %5, align 2
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = insertelement <8 x i16> %7, i16 %6, i64 0
%9 = bitcast <8 x i16> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to i16*
store i16 %6, i16* %10, align 8
%11 = icmp eq i32 %0, 0
br i1 %11, label %15, label %12
12:
%13 = load <16 x i8>, <16 x i8>* %3, align 8
br label %16
14:
store <16 x i8> %20, <16 x i8>* %3, align 8
br label %15
15:
ret void
16:
%17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
%18 = phi i32 [ 0, %12 ], [ %21, %16 ]
%19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
%21 = add nuw i32 %18, 1
%22 = icmp eq i32 %21, %0
br i1 %22, label %14, label %16
}
define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf16_loop_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bxeq lr
; CHECK-FIX-NEXT: .LBB85_1:
; CHECK-FIX-NEXT: vmov r2, s0
; CHECK-FIX-NEXT: vmov.16 d2[0], r2
; CHECK-FIX-NEXT: .LBB85_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: subs r0, r0, #1
; CHECK-FIX-NEXT: vmov.16 d16[0], r2
; CHECK-FIX-NEXT: aesd.8 q8, q1
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bne .LBB85_2
; CHECK-FIX-NEXT: @ %bb.3:
; CHECK-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %13, label %6
6:
%7 = bitcast <16 x i8> %2 to <8 x i16>
%8 = bitcast half %1 to i16
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
%10 = bitcast <8 x i16> %9 to <16 x i8>
%11 = bitcast <16 x i8>* %3 to <8 x i16>*
%12 = bitcast <16 x i8>* %3 to half*
br label %14
13:
ret void
14:
%15 = phi i32 [ 0, %6 ], [ %21, %14 ]
%16 = load <8 x i16>, <8 x i16>* %11, align 8
%17 = insertelement <8 x i16> %16, i16 %8, i64 0
%18 = bitcast <8 x i16> %17 to <16 x i8>
store half %1, half* %12, align 8
%19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10)
%20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
store <16 x i8> %20, <16 x i8>* %3, align 8
%21 = add nuw i32 %15, 1
%22 = icmp eq i32 %21, %0
br i1 %22, label %13, label %14
}
define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_setf32_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vldr s0, [r0]
; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
; CHECK-FIX-NEXT: vmov.f32 s4, s0
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: aesd.8 q1, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q1
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NEXT: bx lr
%4 = load float, float* %0, align 4
%5 = bitcast <16 x i8>* %2 to <4 x float>*
%6 = load <4 x float>, <4 x float>* %5, align 8
%7 = insertelement <4 x float> %6, float %4, i64 0
%8 = bitcast <4 x float> %7 to <16 x i8>
%9 = bitcast <16 x i8> %1 to <4 x float>
%10 = insertelement <4 x float> %9, float %4, i64 0
%11 = bitcast <4 x float> %10 to <16 x i8>
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
store <16 x i8> %13, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
; CHECK-FIX-LABEL: aesd_setf32_via_val:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vmov.f32 s4, s0
; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
; CHECK-FIX-NEXT: vmov.f32 s0, s4
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: vorr q1, q1, q1
; CHECK-FIX-NEXT: aesd.8 q0, q1
; CHECK-FIX-NEXT: aesimc.8 q8, q0
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NEXT: bx lr
%4 = bitcast <16 x i8>* %2 to <4 x float>*
%5 = load <4 x float>, <4 x float>* %4, align 8
%6 = insertelement <4 x float> %5, float %0, i64 0
%7 = bitcast <4 x float> %6 to <16 x i8>
%8 = bitcast <16 x i8> %1 to <4 x float>
%9 = insertelement <4 x float> %8, float %0, i64 0
%10 = bitcast <4 x float> %9 to <16 x i8>
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
store <16 x i8> %12, <16 x i8>* %2, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
; CHECK-FIX: @ %bb.0:
; CHECK-FIX-NEXT: vorr q0, q0, q0
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB88_2
; CHECK-FIX-NEXT: @ %bb.1:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: bne .LBB88_3
; CHECK-FIX-NEXT: b .LBB88_4
; CHECK-FIX-NEXT: .LBB88_2:
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: cmp r0, #0
; CHECK-FIX-NEXT: beq .LBB88_4
; CHECK-FIX-NEXT: .LBB88_3:
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
; CHECK-FIX-NEXT: .LBB88_4:
; CHECK-FIX-NEXT: aesd.8 q8, q0
; CHECK-FIX-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NEXT: bx lr
br i1 %0, label %5, label %10
5:
%6 = load float, float* %1, align 4
%7 = bitcast <16 x i8>* %3 to <4 x float>*
%8 = load <4 x float>, <4 x float>* %7, align 8
%9 = insertelement <4 x float> %8, float %6, i64 0
br label %13
10:
%11 = bitcast <16 x i8>* %3 to <4 x float>*
%12 = load <4 x float>, <4 x float>* %11, align 8
br label %13
13:
%14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
br i1 %0, label %15, label %19
15:
%16 = load float, float* %1, align 4
%17 = bitcast <16 x i8> %2 to <4 x float>
%18 = insertelement <4 x float> %17, float %16, i64 0
br label %21
19:
%20 = bitcast <16 x i8> %2 to <4 x float>
br label %21
21:
%22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
%23 = bitcast <4 x float> %14 to <16 x i8>
%24 = bitcast <4 x float> %22 to <16 x i8>
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
store <16 x i8> %26, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf32_cond_via_val:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bx lr
%5 = bitcast <16 x i8>* %3 to <4 x float>*
%6 = load <4 x float>, <4 x float>* %5, align 8
%7 = insertelement <4 x float> %6, float %1, i64 0
%8 = select i1 %0, <4 x float> %7, <4 x float> %6
%9 = bitcast <16 x i8> %2 to <4 x float>
%10 = insertelement <4 x float> %9, float %1, i64 0
%11 = select i1 %0, <4 x float> %10, <4 x float> %9
%12 = bitcast <4 x float> %8 to <16 x i8>
%13 = bitcast <4 x float> %11 to <16 x i8>
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
store <16 x i8> %15, <16 x i8>* %3, align 8
ret void
}
define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2]
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
; CHECK-FIX-NOSCHED-NEXT: .LBB90_1:
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
; CHECK-FIX-NOSCHED-NEXT: bne .LBB90_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_ptr:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1]
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2]
; CHECK-CORTEX-FIX-NEXT: bxeq lr
; CHECK-CORTEX-FIX-NEXT: .LBB90_1:
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
; CHECK-CORTEX-FIX-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
; CHECK-CORTEX-FIX-NEXT: bne .LBB90_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
; CHECK-CORTEX-FIX-NEXT: bx lr
%5 = load float, float* %1, align 4
%6 = bitcast <16 x i8> %2 to <4 x float>
%7 = insertelement <4 x float> %6, float %5, i64 0
%8 = bitcast <4 x float> %7 to <16 x i8>
%9 = bitcast <16 x i8>* %3 to float*
store float %5, float* %9, align 8
%10 = icmp eq i32 %0, 0
br i1 %10, label %14, label %11
11:
%12 = load <16 x i8>, <16 x i8>* %3, align 8
br label %15
13:
store <16 x i8> %19, <16 x i8>* %3, align 8
br label %14
14:
ret void
15:
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
%20 = add nuw i32 %17, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %13, label %15
}
define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
; CHECK-FIX-NOSCHED-NEXT: .LBB91_1:
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0
; CHECK-FIX-NOSCHED-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-FIX-NOSCHED-NEXT: bne .LBB91_2
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
; CHECK-FIX-NOSCHED-NEXT: bx lr
;
; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_val:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
; CHECK-CORTEX-FIX-NEXT: bxeq lr
; CHECK-CORTEX-FIX-NEXT: .LBB91_1:
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0
; CHECK-CORTEX-FIX-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
; CHECK-CORTEX-FIX-NEXT: bne .LBB91_2
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
; CHECK-CORTEX-FIX-NEXT: bx lr
%5 = icmp eq i32 %0, 0
br i1 %5, label %12, label %6
6:
%7 = bitcast <16 x i8> %2 to <4 x float>
%8 = insertelement <4 x float> %7, float %1, i64 0
%9 = bitcast <4 x float> %8 to <16 x i8>
%10 = bitcast <16 x i8>* %3 to <4 x float>*
%11 = bitcast <16 x i8>* %3 to float*
br label %13
12:
ret void
13:
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
%15 = load <4 x float>, <4 x float>* %10, align 8
%16 = insertelement <4 x float> %15, float %1, i64 0
%17 = bitcast <4 x float> %16 to <16 x i8>
store float %1, float* %11, align 8
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
store <16 x i8> %19, <16 x i8>* %3, align 8
%20 = add nuw i32 %14, 1
%21 = icmp eq i32 %20, %0
br i1 %21, label %12, label %13
}
define arm_aapcs_vfpcc void @aese_constantisland(<16 x i8>* %0) nounwind {
; CHECK-FIX-NOSCHED-LABEL: aese_constantisland:
; CHECK-FIX-NOSCHED: @ %bb.0:
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-FIX-NOSCHED-NEXT: adr r1, .LCPI92_0
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r1:128]
; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-FIX-NOSCHED-NEXT: bx lr
; CHECK-FIX-NOSCHED-NEXT: .p2align 4
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
; CHECK-FIX-NOSCHED-NEXT: .LCPI92_0:
; CHECK-FIX-NOSCHED-NEXT: .byte 0 @ 0x0
; CHECK-FIX-NOSCHED-NEXT: .byte 1 @ 0x1
; CHECK-FIX-NOSCHED-NEXT: .byte 2 @ 0x2
; CHECK-FIX-NOSCHED-NEXT: .byte 3 @ 0x3
; CHECK-FIX-NOSCHED-NEXT: .byte 4 @ 0x4
; CHECK-FIX-NOSCHED-NEXT: .byte 5 @ 0x5
; CHECK-FIX-NOSCHED-NEXT: .byte 6 @ 0x6
; CHECK-FIX-NOSCHED-NEXT: .byte 7 @ 0x7
; CHECK-FIX-NOSCHED-NEXT: .byte 8 @ 0x8
; CHECK-FIX-NOSCHED-NEXT: .byte 9 @ 0x9
; CHECK-FIX-NOSCHED-NEXT: .byte 10 @ 0xa
; CHECK-FIX-NOSCHED-NEXT: .byte 11 @ 0xb
; CHECK-FIX-NOSCHED-NEXT: .byte 12 @ 0xc
; CHECK-FIX-NOSCHED-NEXT: .byte 13 @ 0xd
; CHECK-FIX-NOSCHED-NEXT: .byte 14 @ 0xe
; CHECK-FIX-NOSCHED-NEXT: .byte 15 @ 0xf
;
; CHECK-CORTEX-FIX-LABEL: aese_constantisland:
; CHECK-CORTEX-FIX: @ %bb.0:
; CHECK-CORTEX-FIX-NEXT: adr r1, .LCPI92_0
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r1:128]
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-CORTEX-FIX-NEXT: bx lr
; CHECK-CORTEX-FIX-NEXT: .p2align 4
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
; CHECK-CORTEX-FIX-NEXT: .LCPI92_0:
; CHECK-CORTEX-FIX-NEXT: .byte 0 @ 0x0
; CHECK-CORTEX-FIX-NEXT: .byte 1 @ 0x1
; CHECK-CORTEX-FIX-NEXT: .byte 2 @ 0x2
; CHECK-CORTEX-FIX-NEXT: .byte 3 @ 0x3
; CHECK-CORTEX-FIX-NEXT: .byte 4 @ 0x4
; CHECK-CORTEX-FIX-NEXT: .byte 5 @ 0x5
; CHECK-CORTEX-FIX-NEXT: .byte 6 @ 0x6
; CHECK-CORTEX-FIX-NEXT: .byte 7 @ 0x7
; CHECK-CORTEX-FIX-NEXT: .byte 8 @ 0x8
; CHECK-CORTEX-FIX-NEXT: .byte 9 @ 0x9
; CHECK-CORTEX-FIX-NEXT: .byte 10 @ 0xa
; CHECK-CORTEX-FIX-NEXT: .byte 11 @ 0xb
; CHECK-CORTEX-FIX-NEXT: .byte 12 @ 0xc
; CHECK-CORTEX-FIX-NEXT: .byte 13 @ 0xd
; CHECK-CORTEX-FIX-NEXT: .byte 14 @ 0xe
; CHECK-CORTEX-FIX-NEXT: .byte 15 @ 0xf
%2 = load <16 x i8>, <16 x i8>* %0, align 8
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2)
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
store <16 x i8> %4, <16 x i8>* %0, align 8
ret void
}