Files
clang-p2996/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
sgokhale c4a60c9d34 [CodeGen][ShrinkWrap] Enable PostShrinkWrap by default
This is an attempt to reland D42600 and enabling this optimisation by default.

This also resolves the issue pointed out in the context of PGO build.

Differential Revision: https://reviews.llvm.org/D42600
2023-05-25 13:56:29 +05:30

940 lines
41 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --arm-memtransfer-tploop=allow -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s
; Check that WLSTP loop is not generated for alignment < 4
; void test1(char* dest, char* src, int n){
; memcpy(dest, src, n);
; }
declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
define void @test1(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n){
; CHECK-LABEL: test1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: bl __aeabi_memcpy
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 1 %X, ptr align 1 %Y, i32 %n, i1 false)
ret void
}
; Check that WLSTP loop is generated for alignment >= 4
; void test2(int* restrict X, int* restrict Y, int n){
; memcpy(X, Y, n);
; }
define void @test2(ptr noalias %X, ptr noalias readonly %Y, i32 %n){
; CHECK-LABEL: test2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: wlstp.8 lr, r2, .LBB1_2
; CHECK-NEXT: .LBB1_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB1_1
; CHECK-NEXT: .LBB1_2: @ %entry
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
ret void
}
; Checks that transform handles some arithmetic on the input arguments.
; void test3(int* restrict X, int* restrict Y, int n)
; {
; memcpy(X+2, Y+3, (n*2)+10);
; }
define void @test3(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) {
; CHECK-LABEL: test3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: movs r3, #10
; CHECK-NEXT: add.w r2, r3, r2, lsl #1
; CHECK-NEXT: adds r1, #12
; CHECK-NEXT: adds r0, #8
; CHECK-NEXT: wlstp.8 lr, r2, .LBB2_2
; CHECK-NEXT: .LBB2_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB2_1
; CHECK-NEXT: .LBB2_2: @ %entry
; CHECK-NEXT: pop {r7, pc}
entry:
%add.ptr = getelementptr inbounds i32, ptr %X, i32 2
%add.ptr1 = getelementptr inbounds i32, ptr %Y, i32 3
%mul = shl nsw i32 %n, 1
%add = add nsw i32 %mul, 10
call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 %add.ptr, ptr nonnull align 4 %add.ptr1, i32 %add, i1 false)
ret void
}
; Checks that transform handles for loops that are implicitly converted to mempcy
; void test4(int* restrict X, int* restrict Y, int n){
; for(int i = 0; i < n; ++i){
; X[i] = Y[i];
; }
; }
define void @test4(ptr noalias %X, ptr noalias readonly %Y, i32 %n) {
; CHECK-LABEL: test4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB3_1: @ %for.body.preheader
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: wlstp.8 lr, r2, .LBB3_3
; CHECK-NEXT: .LBB3_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB3_2
; CHECK-NEXT: .LBB3_3: @ %for.body.preheader
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: bx lr
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body.preheader, %entry
ret void
}
; Checks that transform can handle > i32 size inputs
define void @test5(ptr noalias %X, ptr noalias %Y, i64 %n){
; CHECK-LABEL: test5:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: wlstp.8 lr, r2, .LBB4_2
; CHECK-NEXT: .LBB4_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB4_1
; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: pop {r7, pc}
call void @llvm.memcpy.p0.p0.i64(ptr align 4 %X, ptr align 4 %Y, i64 %n, i1 false)
ret void
}
; Checks the transform is applied for constant size inputs below a certain threshold (128 in this case)
define void @test6(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) {
; CHECK-LABEL: test6:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: movs r2, #127
; CHECK-NEXT: wlstp.8 lr, r2, .LBB5_2
; CHECK-NEXT: .LBB5_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB5_1
; CHECK-NEXT: .LBB5_2: @ %entry
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 4 dereferenceable(127) %X, ptr noundef nonnull align 4 dereferenceable(127) %Y, i32 127, i1 false)
ret void
}
; Checks the transform is NOT applied for constant size inputs above a certain threshold (128 in this case)
define void @test7(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) {
; CHECK-LABEL: test7:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: movs r2, #128
; CHECK-NEXT: bl __aeabi_memcpy4
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 128, i1 false)
ret void
}
; Checks the transform is NOT applied for constant size inputs below a certain threshold (64 in this case)
define void @test8(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) {
; CHECK-LABEL: test8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldm.w r1!, {r2, r3, r4, r12, lr}
; CHECK-NEXT: stm.w r0!, {r2, r3, r4, r12, lr}
; CHECK-NEXT: ldm.w r1!, {r2, r3, r4, r12, lr}
; CHECK-NEXT: stm.w r0!, {r2, r3, r4, r12, lr}
; CHECK-NEXT: ldm.w r1, {r2, r3, r4, r12, lr}
; CHECK-NEXT: stm.w r0, {r2, r3, r4, r12, lr}
; CHECK-NEXT: pop {r4, pc}
entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 60, i1 false)
ret void
}
; Checks the transform is NOT applied (regardless of alignment) when optimizations are disabled
define void @test9(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) #0 {
; CHECK-LABEL: test9:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: bl __aeabi_memcpy4
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
ret void
}
; Checks the transform is NOT applied (regardless of alignment) when optimization for size is on (-Os or -Oz)
define void @test10(ptr noalias nocapture %X, ptr noalias nocapture readonly %Y, i32 %n) #1 {
; CHECK-LABEL: test10:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: bl __aeabi_memcpy4
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
ret void
}
define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) {
; CHECK-LABEL: test11:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp.w r2, #-1
; CHECK-NEXT: it gt
; CHECK-NEXT: bxgt lr
; CHECK-NEXT: .LBB10_1: @ %prehead
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov r12, r1
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: wlstp.8 lr, r2, .LBB10_3
; CHECK-NEXT: .LBB10_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q0, [r12], #16
; CHECK-NEXT: vstrb.8 q0, [r4], #16
; CHECK-NEXT: letp lr, .LBB10_2
; CHECK-NEXT: .LBB10_3: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrb r3, [r0], #1
; CHECK-NEXT: subs r2, #2
; CHECK-NEXT: strb r3, [r1], #1
; CHECK-NEXT: bne .LBB10_3
; CHECK-NEXT: @ %bb.4:
; CHECK-NEXT: pop.w {r4, lr}
; CHECK-NEXT: bx lr
entry:
%cmp6 = icmp slt i32 %n, 0
br i1 %cmp6, label %prehead, label %for.cond.cleanup
prehead: ; preds = %entry
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %x, ptr align 4 %y, i32 %n, i1 false)
br label %for.body
for.body: ; preds = %for.body, %prehead
%i.09 = phi i32 [ %inc, %for.body ], [ 0, %prehead ]
%x.addr.08 = phi ptr [ %add.ptr, %for.body ], [ %x, %prehead ]
%y.addr.07 = phi ptr [ %add.ptr1, %for.body ], [ %y, %prehead ]
%add.ptr = getelementptr inbounds i8, ptr %x.addr.08, i32 1
%add.ptr1 = getelementptr inbounds i8, ptr %y.addr.07, i32 1
%l = load i8, ptr %x.addr.08, align 1
store i8 %l, ptr %y.addr.07, align 1
%inc = add nuw nsw i32 %i.09, 2
%exitcond.not = icmp eq i32 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
for.cond.cleanup: ; preds = %entry
ret void
}
; Check that WLSTP loop is generated for simplest case of align = 1
define void @test12(ptr %X, i8 zeroext %c, i32 %n) {
; CHECK-LABEL: test12:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vdup.8 q0, r1
; CHECK-NEXT: wlstp.8 lr, r2, .LBB11_2
; CHECK-NEXT: .LBB11_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB11_1
; CHECK-NEXT: .LBB11_2: @ %entry
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memset.p0.i32(ptr align 1 %X, i8 %c, i32 %n, i1 false)
ret void
}
; Check that WLSTP loop is generated for alignment >= 4
define void @test13(ptr %X, i8 zeroext %c, i32 %n) {
; CHECK-LABEL: test13:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vdup.8 q0, r1
; CHECK-NEXT: wlstp.8 lr, r2, .LBB12_2
; CHECK-NEXT: .LBB12_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB12_1
; CHECK-NEXT: .LBB12_2: @ %entry
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memset.p0.i32(ptr align 4 %X, i8 %c, i32 %n, i1 false)
ret void
}
define void @twoloops(ptr %X, i32 %n, i32 %m) {
; CHECK-LABEL: twoloops:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: mov r3, r0
; CHECK-NEXT: wlstp.8 lr, r2, .LBB13_2
; CHECK-NEXT: .LBB13_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q0, [r3], #16
; CHECK-NEXT: letp lr, .LBB13_1
; CHECK-NEXT: .LBB13_2: @ %entry
; CHECK-NEXT: wlstp.8 lr, r2, .LBB13_4
; CHECK-NEXT: .LBB13_3: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB13_3
; CHECK-NEXT: .LBB13_4: @ %entry
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memset.p0.i32(ptr align 4 %X, i8 0, i32 %m, i1 false)
call void @llvm.memset.p0.i32(ptr align 4 %X, i8 0, i32 %m, i1 false)
ret void
}
; Checks that transform correctly handles input with some arithmetic on input arguments.
; void test14(int* X, char c, int n)
; {
; memset(X+2, c, (n*2)+10);
; }
define void @test14(ptr %X, i8 zeroext %c, i32 %n) {
; CHECK-LABEL: test14:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: movs r3, #10
; CHECK-NEXT: add.w r2, r3, r2, lsl #1
; CHECK-NEXT: vdup.8 q0, r1
; CHECK-NEXT: adds r0, #8
; CHECK-NEXT: wlstp.8 lr, r2, .LBB14_2
; CHECK-NEXT: .LBB14_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB14_1
; CHECK-NEXT: .LBB14_2: @ %entry
; CHECK-NEXT: pop {r7, pc}
entry:
%add.ptr = getelementptr inbounds i32, ptr %X, i32 2
%mul = shl nsw i32 %n, 1
%add = add nsw i32 %mul, 10
call void @llvm.memset.p0.i32(ptr nonnull align 4 %add.ptr, i8 %c, i32 %add, i1 false)
ret void
}
; Checks that transform handles for-loops (that get implicitly converted to memset)
; void test15(int* X, char Y, int n){
; for(int i = 0; i < n; ++i){
; X[i] = c;
; }
; }
define void @test15(ptr nocapture %X, i8 zeroext %c, i32 %n) {
; CHECK-LABEL: test15:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r2, #1
; CHECK-NEXT: it lt
; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB15_1: @ %for.body.preheader
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vdup.8 q0, r1
; CHECK-NEXT: wlstp.8 lr, r2, .LBB15_3
; CHECK-NEXT: .LBB15_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB15_2
; CHECK-NEXT: .LBB15_3: @ %for.body.preheader
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: bx lr
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
call void @llvm.memset.p0.i32(ptr align 4 %X, i8 %c, i32 %n, i1 false)
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body.preheader, %entry
ret void
}
; Checks that transform handles case with 0 as src value. No difference is expected.
define void @test16(ptr %X, i8 zeroext %c, i32 %n) {
; CHECK-LABEL: test16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: wlstp.8 lr, r2, .LBB16_2
; CHECK-NEXT: .LBB16_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB16_1
; CHECK-NEXT: .LBB16_2: @ %entry
; CHECK-NEXT: pop {r7, pc}
entry:
call void @llvm.memset.p0.i32(ptr align 4 %X, i8 0, i32 %n, i1 false)
ret void
}
define void @csprlive(ptr noalias %X, ptr noalias readonly %Y, i32 %n) {
; CHECK-LABEL: csprlive:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: wlstp.8 lr, r2, .LBB17_2
; CHECK-NEXT: .LBB17_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrb.u8 q0, [r1], #16
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB17_1
; CHECK-NEXT: .LBB17_2: @ %entry
; CHECK-NEXT: bl other
; CHECK-NEXT: pop {r7, pc}
entry:
%cmp6 = icmp sgt i32 %n, 0
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %X, ptr align 4 %Y, i32 %n, i1 false)
br i1 %cmp6, label %if, label %else
if:
call void @other()
br label %cleanup
else:
call void @other()
br label %cleanup
cleanup:
ret void
}
declare void @other()
@arr_56 = external dso_local local_unnamed_addr global [21 x [16 x [11 x i8]]], align 1
define void @multilooped_exit(i32 %b) {
; CHECK-LABEL: multilooped_exit:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, #1
; CHECK-NEXT: it lt
; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB18_1: @ %loop.preheader
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w r4, #-1
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: b .LBB18_3
; CHECK-NEXT: .LBB18_2: @ %loop
; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1
; CHECK-NEXT: adds r4, #1
; CHECK-NEXT: cmp.w r4, #1024
; CHECK-NEXT: bge .LBB18_12
; CHECK-NEXT: .LBB18_3: @ %loop
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB18_4 Depth 2
; CHECK-NEXT: @ Child Loop BB18_6 Depth 2
; CHECK-NEXT: @ Child Loop BB18_8 Depth 2
; CHECK-NEXT: @ Child Loop BB18_11 Depth 2
; CHECK-NEXT: movw r3, :lower16:arr_56
; CHECK-NEXT: add.w r1, r0, #15
; CHECK-NEXT: movt r3, :upper16:arr_56
; CHECK-NEXT: lsr.w r12, r1, #4
; CHECK-NEXT: mov r2, r3
; CHECK-NEXT: wlstp.8 lr, r0, .LBB18_5
; CHECK-NEXT: .LBB18_4: @ Parent Loop BB18_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q0, [r2], #16
; CHECK-NEXT: letp lr, .LBB18_4
; CHECK-NEXT: .LBB18_5: @ %loop
; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1
; CHECK-NEXT: mov r2, r3
; CHECK-NEXT: wlstp.8 lr, r0, .LBB18_7
; CHECK-NEXT: .LBB18_6: @ Parent Loop BB18_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q0, [r2], #16
; CHECK-NEXT: letp lr, .LBB18_6
; CHECK-NEXT: .LBB18_7: @ %loop
; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1
; CHECK-NEXT: mov r2, r3
; CHECK-NEXT: wlstp.8 lr, r0, .LBB18_9
; CHECK-NEXT: .LBB18_8: @ Parent Loop BB18_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q0, [r2], #16
; CHECK-NEXT: letp lr, .LBB18_8
; CHECK-NEXT: .LBB18_9: @ %loop
; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: beq .LBB18_2
; CHECK-NEXT: @ %bb.10: @ %loop
; CHECK-NEXT: @ in Loop: Header=BB18_3 Depth=1
; CHECK-NEXT: dlstp.8 lr, r0
; CHECK-NEXT: .LBB18_11: @ Parent Loop BB18_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vstrb.8 q0, [r3], #16
; CHECK-NEXT: letp lr, .LBB18_11
; CHECK-NEXT: b .LBB18_2
; CHECK-NEXT: .LBB18_12:
; CHECK-NEXT: pop.w {r4, lr}
; CHECK-NEXT: bx lr
entry:
%cmp8 = icmp sgt i32 %b, 0
br i1 %cmp8, label %loop, label %exit
loop:
%p = phi i32 [ 0, %entry ], [ %inc, %loop ]
call void @llvm.memset.p0.i32(ptr align 1 getelementptr ([21 x [16 x [11 x i8]]], ptr @arr_56, i32 0, i32 0, i32 undef, i32 0), i8 0, i32 %b, i1 false)
call void @llvm.memset.p0.i32(ptr align 1 getelementptr ([21 x [16 x [11 x i8]]], ptr @arr_56, i32 0, i32 0, i32 undef, i32 0), i8 0, i32 %b, i1 false)
call void @llvm.memset.p0.i32(ptr align 1 getelementptr ([21 x [16 x [11 x i8]]], ptr @arr_56, i32 0, i32 0, i32 undef, i32 0), i8 0, i32 %b, i1 false)
call void @llvm.memset.p0.i32(ptr align 1 getelementptr ([21 x [16 x [11 x i8]]], ptr @arr_56, i32 0, i32 0, i32 undef, i32 0), i8 0, i32 %b, i1 false)
%inc = add i32 %p, 1
%c = icmp slt i32 %p, 1024
br i1 %c, label %loop, label %exit
exit:
ret void
}
@arr_21 = external dso_local local_unnamed_addr global [17 x [12 x [19 x i16]]], align 2
@arr_20 = external dso_local local_unnamed_addr global [17 x [12 x [19 x i64]]], align 8
@arr_22 = external dso_local local_unnamed_addr global [17 x [12 x [19 x i16]]], align 2
define i32 @reverted(i1 zeroext %b) {
; CHECK-LABEL: reverted:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #12
; CHECK-NEXT: sub sp, #12
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov.w r1, #11
; CHECK-NEXT: cinc r1, r1, ne
; CHECK-NEXT: movs r0, #38
; CHECK-NEXT: mul r2, r1, r0
; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: movw r0, :lower16:arr_22
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: movt r0, :upper16:arr_22
; CHECK-NEXT: add.w r1, r2, #15
; CHECK-NEXT: lsrs r3, r1, #4
; CHECK-NEXT: strd r3, r2, [sp] @ 8-byte Folded Spill
; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_2
; CHECK-NEXT: .LBB19_1: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q0, [r0], #16
; CHECK-NEXT: letp lr, .LBB19_1
; CHECK-NEXT: .LBB19_2: @ %entry
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: movw r6, :lower16:arr_20
; CHECK-NEXT: movt r6, :upper16:arr_20
; CHECK-NEXT: add.w r3, r6, #80
; CHECK-NEXT: dls lr, r0
; CHECK-NEXT: movw r0, :lower16:arr_21
; CHECK-NEXT: movt r0, :upper16:arr_21
; CHECK-NEXT: add.w r5, r0, #36
; CHECK-NEXT: add.w r11, r6, #128
; CHECK-NEXT: add.w r7, r6, #112
; CHECK-NEXT: add.w r2, r6, #96
; CHECK-NEXT: add.w r4, r6, #64
; CHECK-NEXT: add.w r0, r6, #48
; CHECK-NEXT: add.w r1, r6, #32
; CHECK-NEXT: add.w r12, r6, #16
; CHECK-NEXT: adr r6, .LCPI19_0
; CHECK-NEXT: vldrw.u32 q0, [r6]
; CHECK-NEXT: movw r6, :lower16:arr_20
; CHECK-NEXT: mov.w r8, #327685
; CHECK-NEXT: mov.w r9, #5
; CHECK-NEXT: vmov.i16 q1, #0x5
; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: movt r6, :upper16:arr_20
; CHECK-NEXT: .LBB19_3: @ %for.cond8.preheader
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str r8, [r5, #-4]
; CHECK-NEXT: vstrh.16 q1, [r5, #-36]
; CHECK-NEXT: strh.w r9, [r5]
; CHECK-NEXT: vstrh.16 q1, [r5, #-20]
; CHECK-NEXT: vstrw.32 q0, [r3]
; CHECK-NEXT: vstrh.16 q0, [r12], #152
; CHECK-NEXT: vstrh.16 q0, [r6], #152
; CHECK-NEXT: vstrh.16 q0, [r1], #152
; CHECK-NEXT: vstrh.16 q0, [r0], #152
; CHECK-NEXT: vstrh.16 q0, [r4], #152
; CHECK-NEXT: vstrh.16 q0, [r2], #152
; CHECK-NEXT: vstrh.16 q0, [r7], #152
; CHECK-NEXT: vstrh.16 q0, [r11], #152
; CHECK-NEXT: strd r9, r10, [r3, #64]
; CHECK-NEXT: adds r5, #38
; CHECK-NEXT: adds r3, #152
; CHECK-NEXT: le lr, .LBB19_3
; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup6
; CHECK-NEXT: movw r0, :lower16:arr_22
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: movt r0, :upper16:arr_22
; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-NEXT: add.w r0, r0, #1824
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_6
; CHECK-NEXT: .LBB19_5: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q1, [r0], #16
; CHECK-NEXT: letp lr, .LBB19_5
; CHECK-NEXT: .LBB19_6: @ %for.cond.cleanup6
; CHECK-NEXT: movw r6, :lower16:arr_20
; CHECK-NEXT: movw r0, #7376
; CHECK-NEXT: movt r6, :upper16:arr_20
; CHECK-NEXT: adds r3, r6, r0
; CHECK-NEXT: movw r0, #7408
; CHECK-NEXT: add.w r12, r6, r0
; CHECK-NEXT: movw r0, #7344
; CHECK-NEXT: add.w r9, r6, r0
; CHECK-NEXT: movw r0, #7312
; CHECK-NEXT: adds r2, r6, r0
; CHECK-NEXT: movw r0, :lower16:arr_21
; CHECK-NEXT: add.w r1, r6, #7424
; CHECK-NEXT: add.w r7, r6, #7392
; CHECK-NEXT: add.w r4, r6, #7360
; CHECK-NEXT: add.w r5, r6, #7328
; CHECK-NEXT: add.w r8, r6, #7296
; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: movt r0, :upper16:arr_21
; CHECK-NEXT: addw r0, r0, #1860
; CHECK-NEXT: mov.w r10, #5
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: mov.w r6, #327685
; CHECK-NEXT: vmov.i16 q1, #0x5
; CHECK-NEXT: mov.w r11, #0
; CHECK-NEXT: .LBB19_7: @ %for.cond8.preheader.1
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str r6, [r0, #-4]
; CHECK-NEXT: vstrh.16 q1, [r0, #-36]
; CHECK-NEXT: strh.w r10, [r0]
; CHECK-NEXT: vstrh.16 q1, [r0, #-20]
; CHECK-NEXT: vstrw.32 q0, [r3]
; CHECK-NEXT: vstrh.16 q0, [r2], #152
; CHECK-NEXT: vstrh.16 q0, [r8], #152
; CHECK-NEXT: vstrh.16 q0, [r5], #152
; CHECK-NEXT: vstrh.16 q0, [r9], #152
; CHECK-NEXT: vstrh.16 q0, [r4], #152
; CHECK-NEXT: vstrh.16 q0, [r7], #152
; CHECK-NEXT: vstrh.16 q0, [r12], #152
; CHECK-NEXT: vstrh.16 q0, [r1], #152
; CHECK-NEXT: strd r10, r11, [r3, #64]
; CHECK-NEXT: adds r0, #38
; CHECK-NEXT: adds r3, #152
; CHECK-NEXT: le lr, .LBB19_7
; CHECK-NEXT: @ %bb.8: @ %for.cond.cleanup6.1
; CHECK-NEXT: movw r0, :lower16:arr_22
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: movt r0, :upper16:arr_22
; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload
; CHECK-NEXT: add.w r0, r0, #3648
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: wlstp.8 lr, r2, .LBB19_10
; CHECK-NEXT: .LBB19_9: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q1, [r0], #16
; CHECK-NEXT: letp lr, .LBB19_9
; CHECK-NEXT: .LBB19_10: @ %for.cond.cleanup6.1
; CHECK-NEXT: movw r7, :lower16:arr_20
; CHECK-NEXT: movw r0, #14672
; CHECK-NEXT: movt r7, :upper16:arr_20
; CHECK-NEXT: adds r3, r7, r0
; CHECK-NEXT: movw r0, #14704
; CHECK-NEXT: add.w r12, r7, r0
; CHECK-NEXT: movw r0, #14688
; CHECK-NEXT: add.w r8, r7, r0
; CHECK-NEXT: movw r0, #14640
; CHECK-NEXT: add.w r9, r7, r0
; CHECK-NEXT: movw r0, #14624
; CHECK-NEXT: adds r2, r7, r0
; CHECK-NEXT: movw r0, #14608
; CHECK-NEXT: movw r1, :lower16:arr_21
; CHECK-NEXT: add r0, r7
; CHECK-NEXT: add.w r4, r7, #14720
; CHECK-NEXT: add.w r5, r7, #14656
; CHECK-NEXT: add.w r6, r7, #14592
; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: movt r1, :upper16:arr_21
; CHECK-NEXT: addw r1, r1, #3684
; CHECK-NEXT: mov.w r10, #5
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: mov.w r7, #327685
; CHECK-NEXT: vmov.i16 q1, #0x5
; CHECK-NEXT: mov.w r11, #0
; CHECK-NEXT: .LBB19_11: @ %for.cond8.preheader.2
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str r7, [r1, #-4]
; CHECK-NEXT: vstrh.16 q1, [r1, #-36]
; CHECK-NEXT: strh.w r10, [r1]
; CHECK-NEXT: vstrh.16 q1, [r1, #-20]
; CHECK-NEXT: vstrw.32 q0, [r3]
; CHECK-NEXT: vstrh.16 q0, [r0], #152
; CHECK-NEXT: vstrh.16 q0, [r6], #152
; CHECK-NEXT: vstrh.16 q0, [r2], #152
; CHECK-NEXT: vstrh.16 q0, [r9], #152
; CHECK-NEXT: vstrh.16 q0, [r5], #152
; CHECK-NEXT: vstrh.16 q0, [r8], #152
; CHECK-NEXT: vstrh.16 q0, [r12], #152
; CHECK-NEXT: vstrh.16 q0, [r4], #152
; CHECK-NEXT: strd r10, r11, [r3, #64]
; CHECK-NEXT: adds r1, #38
; CHECK-NEXT: adds r3, #152
; CHECK-NEXT: le lr, .LBB19_11
; CHECK-NEXT: @ %bb.12: @ %for.cond.cleanup6.2
; CHECK-NEXT: movw r0, :lower16:arr_22
; CHECK-NEXT: ldrd r2, r1, [sp] @ 8-byte Folded Reload
; CHECK-NEXT: movt r0, :upper16:arr_22
; CHECK-NEXT: vmov.i32 q1, #0x0
; CHECK-NEXT: add.w r0, r0, #5472
; CHECK-NEXT: wlstp.8 lr, r1, .LBB19_14
; CHECK-NEXT: .LBB19_13: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vstrb.8 q1, [r0], #16
; CHECK-NEXT: letp lr, .LBB19_13
; CHECK-NEXT: .LBB19_14: @ %for.cond.cleanup6.2
; CHECK-NEXT: movw r2, :lower16:arr_21
; CHECK-NEXT: movw r1, #5508
; CHECK-NEXT: movt r2, :upper16:arr_21
; CHECK-NEXT: movw r7, :lower16:arr_20
; CHECK-NEXT: add r2, r1
; CHECK-NEXT: movw r1, #22000
; CHECK-NEXT: movt r7, :upper16:arr_20
; CHECK-NEXT: add.w r12, r7, r1
; CHECK-NEXT: movw r1, #21984
; CHECK-NEXT: add.w r8, r7, r1
; CHECK-NEXT: movw r1, #21952
; CHECK-NEXT: add.w r9, r7, r1
; CHECK-NEXT: movw r1, #21936
; CHECK-NEXT: movw r0, #21968
; CHECK-NEXT: adds r5, r7, r1
; CHECK-NEXT: movw r1, #21920
; CHECK-NEXT: movw r3, #21904
; CHECK-NEXT: adds r4, r7, r3
; CHECK-NEXT: add r0, r7
; CHECK-NEXT: add r1, r7
; CHECK-NEXT: add.w r3, r7, #22016
; CHECK-NEXT: add.w r6, r7, #21888
; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: mov.w r10, #5
; CHECK-NEXT: vmov.i16 q1, #0x5
; CHECK-NEXT: mov.w r11, #0
; CHECK-NEXT: dls lr, r7
; CHECK-NEXT: mov.w r7, #327685
; CHECK-NEXT: .LBB19_15: @ %for.cond8.preheader.3
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str r7, [r2, #-4]
; CHECK-NEXT: vstrh.16 q1, [r2, #-36]
; CHECK-NEXT: strh.w r10, [r2]
; CHECK-NEXT: vstrh.16 q1, [r2, #-20]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: vstrh.16 q0, [r4], #152
; CHECK-NEXT: vstrh.16 q0, [r6], #152
; CHECK-NEXT: vstrh.16 q0, [r1], #152
; CHECK-NEXT: vstrh.16 q0, [r5], #152
; CHECK-NEXT: vstrh.16 q0, [r9], #152
; CHECK-NEXT: vstrh.16 q0, [r8], #152
; CHECK-NEXT: vstrh.16 q0, [r12], #152
; CHECK-NEXT: vstrh.16 q0, [r3], #152
; CHECK-NEXT: strd r10, r11, [r0, #64]
; CHECK-NEXT: adds r2, #38
; CHECK-NEXT: adds r0, #152
; CHECK-NEXT: le lr, .LBB19_15
; CHECK-NEXT: @ %bb.16: @ %for.cond.cleanup6.3
; CHECK-NEXT: add sp, #12
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.17:
; CHECK-NEXT: .LCPI19_0:
; CHECK-NEXT: .long 5 @ 0x5
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 5 @ 0x5
; CHECK-NEXT: .long 0 @ 0x0
entry:
%add = select i1 %b, i32 12, i32 11
%0 = mul nuw nsw i32 %add, 38
call void @llvm.memset.p0.i32(ptr noundef nonnull align 2 dereferenceable(1) @arr_22, i8 0, i32 %0, i1 false)
br label %for.cond8.preheader
for.cond8.preheader: ; preds = %entry, %for.cond8.preheader
%d.051 = phi i32 [ 0, %entry ], [ %inc, %for.cond8.preheader ]
%arrayidx16 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 0
%arrayidx21 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 0
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21, align 8
%arrayidx21.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.2, align 8
%arrayidx21.4 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 4
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.4, align 8
%arrayidx21.6 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 6
store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.6, align 8
%arrayidx16.8 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 8
%arrayidx21.8 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 8
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.8, align 8
%arrayidx21.10 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 10
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.10, align 8
%arrayidx21.12 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 12
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.12, align 8
%arrayidx21.14 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 14
store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.8, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.14, align 8
%arrayidx16.16 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 16
store i16 5, ptr %arrayidx16.16, align 2
%arrayidx21.16 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 16
%arrayidx16.17 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 17
store i16 5, ptr %arrayidx16.17, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.16, align 8
%arrayidx16.18 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 0, i32 %d.051, i32 18
store i16 5, ptr %arrayidx16.18, align 2
%arrayidx21.18 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 0, i32 %d.051, i32 18
store i64 5, ptr %arrayidx21.18, align 8
%inc = add nuw nsw i32 %d.051, 1
%exitcond.not = icmp eq i32 %inc, %add
br i1 %exitcond.not, label %for.cond.cleanup6, label %for.cond8.preheader
for.cond.cleanup6: ; preds = %for.cond8.preheader
call void @llvm.memset.p0.i32(ptr noundef nonnull align 2 dereferenceable(1) getelementptr inbounds ([17 x [12 x [19 x i16]]], ptr @arr_22, i32 0, i32 4, i32 0, i32 0), i8 0, i32 %0, i1 false)
br label %for.cond8.preheader.1
for.cond8.preheader.1: ; preds = %for.cond8.preheader.1, %for.cond.cleanup6
%d.051.1 = phi i32 [ 0, %for.cond.cleanup6 ], [ %inc.1, %for.cond8.preheader.1 ]
%arrayidx16.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 0
%arrayidx21.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 0
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.1, align 8
%arrayidx21.2.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.2.1, align 8
%arrayidx21.4.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 4
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.4.1, align 8
%arrayidx21.6.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 6
store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.1, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.6.1, align 8
%arrayidx16.8.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 8
%arrayidx21.8.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 8
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.8.1, align 8
%arrayidx21.10.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 10
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.10.1, align 8
%arrayidx21.12.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 12
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.12.1, align 8
%arrayidx21.14.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 14
store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.8.1, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.14.1, align 8
%arrayidx16.16.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 16
store i16 5, ptr %arrayidx16.16.1, align 2
%arrayidx21.16.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 16
%arrayidx16.17.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 17
store i16 5, ptr %arrayidx16.17.1, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.16.1, align 8
%arrayidx16.18.1 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 4, i32 %d.051.1, i32 18
store i16 5, ptr %arrayidx16.18.1, align 2
%arrayidx21.18.1 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 4, i32 %d.051.1, i32 18
store i64 5, ptr %arrayidx21.18.1, align 8
%inc.1 = add nuw nsw i32 %d.051.1, 1
%exitcond.not.1 = icmp eq i32 %inc.1, %add
br i1 %exitcond.not.1, label %for.cond.cleanup6.1, label %for.cond8.preheader.1
for.cond.cleanup6.1: ; preds = %for.cond8.preheader.1
call void @llvm.memset.p0.i32(ptr noundef nonnull align 2 dereferenceable(1) getelementptr inbounds ([17 x [12 x [19 x i16]]], ptr @arr_22, i32 0, i32 8, i32 0, i32 0), i8 0, i32 %0, i1 false)
br label %for.cond8.preheader.2
for.cond8.preheader.2: ; preds = %for.cond8.preheader.2, %for.cond.cleanup6.1
%d.051.2 = phi i32 [ 0, %for.cond.cleanup6.1 ], [ %inc.2, %for.cond8.preheader.2 ]
%arrayidx16.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 0
%arrayidx21.254 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 0
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.254, align 8
%arrayidx21.2.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.2.2, align 8
%arrayidx21.4.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 4
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.4.2, align 8
%arrayidx21.6.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 6
store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.2, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.6.2, align 8
%arrayidx16.8.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 8
%arrayidx21.8.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 8
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.8.2, align 8
%arrayidx21.10.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 10
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.10.2, align 8
%arrayidx21.12.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 12
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.12.2, align 8
%arrayidx21.14.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 14
store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.8.2, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.14.2, align 8
%arrayidx16.16.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 16
store i16 5, ptr %arrayidx16.16.2, align 2
%arrayidx21.16.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 16
%arrayidx16.17.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 17
store i16 5, ptr %arrayidx16.17.2, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.16.2, align 8
%arrayidx16.18.2 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 8, i32 %d.051.2, i32 18
store i16 5, ptr %arrayidx16.18.2, align 2
%arrayidx21.18.2 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 8, i32 %d.051.2, i32 18
store i64 5, ptr %arrayidx21.18.2, align 8
%inc.2 = add nuw nsw i32 %d.051.2, 1
%exitcond.not.2 = icmp eq i32 %inc.2, %add
br i1 %exitcond.not.2, label %for.cond.cleanup6.2, label %for.cond8.preheader.2
for.cond.cleanup6.2: ; preds = %for.cond8.preheader.2
call void @llvm.memset.p0.i32(ptr noundef nonnull align 2 dereferenceable(1) getelementptr inbounds ([17 x [12 x [19 x i16]]], ptr @arr_22, i32 0, i32 12, i32 0, i32 0), i8 0, i32 %0, i1 false)
br label %for.cond8.preheader.3
for.cond8.preheader.3: ; preds = %for.cond8.preheader.3, %for.cond.cleanup6.2
%d.051.3 = phi i32 [ 0, %for.cond.cleanup6.2 ], [ %inc.3, %for.cond8.preheader.3 ]
%arrayidx16.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 0
%arrayidx21.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 0
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.3, align 8
%arrayidx21.2.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.2.3, align 8
%arrayidx21.4.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 4
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.4.3, align 8
%arrayidx21.6.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 6
store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.3, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.6.3, align 8
%arrayidx16.8.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 8
%arrayidx21.8.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 8
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.8.3, align 8
%arrayidx21.10.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 10
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.10.3, align 8
%arrayidx21.12.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 12
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.12.3, align 8
%arrayidx21.14.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 14
store <8 x i16> <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>, ptr %arrayidx16.8.3, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.14.3, align 8
%arrayidx16.16.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 16
store i16 5, ptr %arrayidx16.16.3, align 2
%arrayidx21.16.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 16
%arrayidx16.17.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 17
store i16 5, ptr %arrayidx16.17.3, align 2
store <2 x i64> <i64 5, i64 5>, ptr %arrayidx21.16.3, align 8
%arrayidx16.18.3 = getelementptr inbounds [17 x [12 x [19 x i16]]], ptr @arr_21, i32 0, i32 12, i32 %d.051.3, i32 18
store i16 5, ptr %arrayidx16.18.3, align 2
%arrayidx21.18.3 = getelementptr inbounds [17 x [12 x [19 x i64]]], ptr @arr_20, i32 0, i32 12, i32 %d.051.3, i32 18
store i64 5, ptr %arrayidx21.18.3, align 8
%inc.3 = add nuw nsw i32 %d.051.3, 1
%exitcond.not.3 = icmp eq i32 %inc.3, %add
br i1 %exitcond.not.3, label %for.cond.cleanup6.3, label %for.cond8.preheader.3
for.cond.cleanup6.3: ; preds = %for.cond8.preheader.3
ret i32 undef
}
attributes #0 = { noinline optnone }
attributes #1 = { optsize }