Files
clang-p2996/llvm/test/CodeGen/RISCV/pr69586.ll
Wang Pengcheng 2023a230d1 [RISCV] Move V0 to the end of register allocation order (#82967)
According to

https://riscv-optimization-guide-riseproject-c94355ae3e6872252baa952524.gitlab.io/riscv-optimization-guide.html:

> The v0 register defined by the RISC-V vector extension is special in
> that it can be used both as a general purpose vector register and also
> as a mask register. As a preference, use registers other than v0 for
> non-mask values. Otherwise data will have to be moved out of v0 when a
> mask is required in an operation. v0 may be used when all other
> registers are in use, and using v0 would avoid spilling register state
> to memory.

And using V0 register may stall masking pipeline and stop chaining
for some microarchitectures.

So we should try to not use V0 and register groups contained it as
much as possible. We achieve this via moving V0 to the end of RA
order.
2024-03-01 12:17:56 +08:00

2044 lines
102 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \
; RUN: -riscv-use-rematerializable-movimm=false | FileCheck %s --check-prefix=NOREMAT
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \
; RUN: --riscv-use-rematerializable-movimm=true | FileCheck %s --check-prefix=REMAT
define void @test(ptr %0, ptr %1, i64 %2) {
; NOREMAT-LABEL: test:
; NOREMAT: # %bb.0:
; NOREMAT-NEXT: addi sp, sp, -400
; NOREMAT-NEXT: .cfi_def_cfa_offset 400
; NOREMAT-NEXT: sd ra, 392(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s0, 384(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s1, 376(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s2, 368(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s3, 360(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s4, 352(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s5, 344(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s6, 336(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s7, 328(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s8, 320(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s9, 312(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s10, 304(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: sd s11, 296(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: .cfi_offset ra, -8
; NOREMAT-NEXT: .cfi_offset s0, -16
; NOREMAT-NEXT: .cfi_offset s1, -24
; NOREMAT-NEXT: .cfi_offset s2, -32
; NOREMAT-NEXT: .cfi_offset s3, -40
; NOREMAT-NEXT: .cfi_offset s4, -48
; NOREMAT-NEXT: .cfi_offset s5, -56
; NOREMAT-NEXT: .cfi_offset s6, -64
; NOREMAT-NEXT: .cfi_offset s7, -72
; NOREMAT-NEXT: .cfi_offset s8, -80
; NOREMAT-NEXT: .cfi_offset s9, -88
; NOREMAT-NEXT: .cfi_offset s10, -96
; NOREMAT-NEXT: .cfi_offset s11, -104
; NOREMAT-NEXT: csrr a2, vlenb
; NOREMAT-NEXT: li a3, 6
; NOREMAT-NEXT: mul a2, a2, a3
; NOREMAT-NEXT: sub sp, sp, a2
; NOREMAT-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x03, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 400 + 6 * vlenb
; NOREMAT-NEXT: li a2, 32
; NOREMAT-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; NOREMAT-NEXT: vle32.v v8, (a0)
; NOREMAT-NEXT: addi a2, a0, 512
; NOREMAT-NEXT: vle32.v v10, (a2)
; NOREMAT-NEXT: addi a2, a0, 1024
; NOREMAT-NEXT: vle32.v v12, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v10
; NOREMAT-NEXT: vle32.v v8, (a2)
; NOREMAT-NEXT: addi a2, a0, 1536
; NOREMAT-NEXT: vle32.v v14, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a2)
; NOREMAT-NEXT: li a2, 1
; NOREMAT-NEXT: slli a2, a2, 11
; NOREMAT-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a2, a0, a2
; NOREMAT-NEXT: vle32.v v12, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a2)
; NOREMAT-NEXT: li a5, 5
; NOREMAT-NEXT: slli a2, a5, 9
; NOREMAT-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a2, a0, a2
; NOREMAT-NEXT: vle32.v v14, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a2)
; NOREMAT-NEXT: li a2, 3
; NOREMAT-NEXT: slli a3, a2, 10
; NOREMAT-NEXT: sd a3, 256(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v12, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a3)
; NOREMAT-NEXT: li a4, 7
; NOREMAT-NEXT: slli a3, a4, 9
; NOREMAT-NEXT: sd a3, 248(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v14, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a3)
; NOREMAT-NEXT: lui a3, 1
; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v12, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a3)
; NOREMAT-NEXT: li a3, 9
; NOREMAT-NEXT: slli a6, a3, 9
; NOREMAT-NEXT: sd a6, 240(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v14, (a6)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a6)
; NOREMAT-NEXT: slli a6, a5, 10
; NOREMAT-NEXT: sd a6, 232(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v12, (a6)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a6)
; NOREMAT-NEXT: li s8, 11
; NOREMAT-NEXT: slli a6, s8, 9
; NOREMAT-NEXT: sd a6, 224(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v14, (a6)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a6)
; NOREMAT-NEXT: slli a2, a2, 11
; NOREMAT-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a2, a0, a2
; NOREMAT-NEXT: vle32.v v12, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a2)
; NOREMAT-NEXT: li s2, 13
; NOREMAT-NEXT: slli a2, s2, 9
; NOREMAT-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a2, a0, a2
; NOREMAT-NEXT: vle32.v v14, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a2)
; NOREMAT-NEXT: slli a2, a4, 10
; NOREMAT-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a2, a0, a2
; NOREMAT-NEXT: vle32.v v12, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a2)
; NOREMAT-NEXT: li a2, 15
; NOREMAT-NEXT: slli a6, a2, 9
; NOREMAT-NEXT: sd a6, 192(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v26, (a6)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v16, (a6)
; NOREMAT-NEXT: lui a6, 2
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v28, (a6)
; NOREMAT-NEXT: vle32.v v10, (a6)
; NOREMAT-NEXT: li a6, 17
; NOREMAT-NEXT: slli a6, a6, 9
; NOREMAT-NEXT: sd a6, 184(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: li t0, 17
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v30, (a6)
; NOREMAT-NEXT: vle32.v v18, (a6)
; NOREMAT-NEXT: slli a6, a3, 10
; NOREMAT-NEXT: sd a6, 176(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v6, (a6)
; NOREMAT-NEXT: vle32.v v20, (a6)
; NOREMAT-NEXT: li a6, 19
; NOREMAT-NEXT: slli a6, a6, 9
; NOREMAT-NEXT: sd a6, 168(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: li a7, 19
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v4, (a6)
; NOREMAT-NEXT: vle32.v v22, (a6)
; NOREMAT-NEXT: slli a5, a5, 11
; NOREMAT-NEXT: sd a5, 160(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
; NOREMAT-NEXT: vle32.v v2, (a5)
; NOREMAT-NEXT: vle32.v v12, (a5)
; NOREMAT-NEXT: li s10, 21
; NOREMAT-NEXT: slli a5, s10, 9
; NOREMAT-NEXT: sd a5, 152(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
; NOREMAT-NEXT: vle32.v v24, (a5)
; NOREMAT-NEXT: vle32.v v14, (a5)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v26
; NOREMAT-NEXT: slli a5, s8, 10
; NOREMAT-NEXT: sd a5, 144(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
; NOREMAT-NEXT: vle32.v v26, (a5)
; NOREMAT-NEXT: vle32.v v8, (a5)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v28
; NOREMAT-NEXT: li s6, 23
; NOREMAT-NEXT: slli a5, s6, 9
; NOREMAT-NEXT: sd a5, 136(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
; NOREMAT-NEXT: vle32.v v28, (a5)
; NOREMAT-NEXT: vle32.v v16, (a5)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v30
; NOREMAT-NEXT: lui a5, 3
; NOREMAT-NEXT: add a5, a0, a5
; NOREMAT-NEXT: vle32.v v30, (a5)
; NOREMAT-NEXT: vle32.v v10, (a5)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v6
; NOREMAT-NEXT: li s3, 25
; NOREMAT-NEXT: slli a5, s3, 9
; NOREMAT-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
; NOREMAT-NEXT: vle32.v v6, (a5)
; NOREMAT-NEXT: vle32.v v18, (a5)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v4
; NOREMAT-NEXT: slli a5, s2, 10
; NOREMAT-NEXT: sd a5, 120(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
; NOREMAT-NEXT: vle32.v v4, (a5)
; NOREMAT-NEXT: vle32.v v20, (a5)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v2
; NOREMAT-NEXT: li t5, 27
; NOREMAT-NEXT: slli a5, t5, 9
; NOREMAT-NEXT: sd a5, 112(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
; NOREMAT-NEXT: vle32.v v2, (a5)
; NOREMAT-NEXT: vle32.v v22, (a5)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v24
; NOREMAT-NEXT: slli a4, a4, 11
; NOREMAT-NEXT: sd a4, 104(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
; NOREMAT-NEXT: vle32.v v24, (a4)
; NOREMAT-NEXT: vle32.v v12, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v26
; NOREMAT-NEXT: li t2, 29
; NOREMAT-NEXT: slli a4, t2, 9
; NOREMAT-NEXT: sd a4, 96(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
; NOREMAT-NEXT: vle32.v v26, (a4)
; NOREMAT-NEXT: vle32.v v14, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v28
; NOREMAT-NEXT: slli a4, a2, 10
; NOREMAT-NEXT: sd a4, 88(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
; NOREMAT-NEXT: vle32.v v28, (a4)
; NOREMAT-NEXT: vle32.v v8, (a4)
; NOREMAT-NEXT: csrr a4, vlenb
; NOREMAT-NEXT: slli a4, a4, 2
; NOREMAT-NEXT: add a4, sp, a4
; NOREMAT-NEXT: addi a4, a4, 288
; NOREMAT-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill
; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v30
; NOREMAT-NEXT: li a5, 31
; NOREMAT-NEXT: slli a4, a5, 9
; NOREMAT-NEXT: sd a4, 80(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
; NOREMAT-NEXT: vle32.v v30, (a4)
; NOREMAT-NEXT: vle32.v v16, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v6
; NOREMAT-NEXT: lui a6, 4
; NOREMAT-NEXT: add a4, a0, a6
; NOREMAT-NEXT: vle32.v v6, (a4)
; NOREMAT-NEXT: vle32.v v8, (a4)
; NOREMAT-NEXT: csrr a4, vlenb
; NOREMAT-NEXT: slli a4, a4, 1
; NOREMAT-NEXT: add a4, sp, a4
; NOREMAT-NEXT: addi a4, a4, 288
; NOREMAT-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill
; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v4
; NOREMAT-NEXT: addiw a4, a6, 512
; NOREMAT-NEXT: sd a4, 72(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
; NOREMAT-NEXT: vle32.v v4, (a4)
; NOREMAT-NEXT: vle32.v v18, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v2
; NOREMAT-NEXT: slli a4, t0, 10
; NOREMAT-NEXT: sd a4, 64(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
; NOREMAT-NEXT: vle32.v v2, (a4)
; NOREMAT-NEXT: vle32.v v20, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v24
; NOREMAT-NEXT: addiw a4, a6, 1536
; NOREMAT-NEXT: sd a4, 56(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
; NOREMAT-NEXT: vle32.v v0, (a4)
; NOREMAT-NEXT: vle32.v v22, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v26
; NOREMAT-NEXT: slli a3, a3, 11
; NOREMAT-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v12, (a3)
; NOREMAT-NEXT: vle32.v v8, (a3)
; NOREMAT-NEXT: addi a3, sp, 288
; NOREMAT-NEXT: vs2r.v v8, (a3) # Unknown-size Folded Spill
; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v28
; NOREMAT-NEXT: lui s1, 5
; NOREMAT-NEXT: addiw a3, s1, -1536
; NOREMAT-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v8, (a3)
; NOREMAT-NEXT: vle32.v v24, (a3)
; NOREMAT-NEXT: csrr a3, vlenb
; NOREMAT-NEXT: slli a3, a3, 2
; NOREMAT-NEXT: add a3, sp, a3
; NOREMAT-NEXT: addi a3, a3, 288
; NOREMAT-NEXT: vl2r.v v10, (a3) # Unknown-size Folded Reload
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v30
; NOREMAT-NEXT: slli a3, a7, 10
; NOREMAT-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v10, (a3)
; NOREMAT-NEXT: vle32.v v14, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v6
; NOREMAT-NEXT: addiw a3, s1, -512
; NOREMAT-NEXT: sd a3, 24(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v6, (a3)
; NOREMAT-NEXT: vle32.v v16, (a3)
; NOREMAT-NEXT: csrr a3, vlenb
; NOREMAT-NEXT: slli a3, a3, 1
; NOREMAT-NEXT: add a3, sp, a3
; NOREMAT-NEXT: addi a3, a3, 288
; NOREMAT-NEXT: vl2r.v v26, (a3) # Unknown-size Folded Reload
; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v4
; NOREMAT-NEXT: add a3, a0, s1
; NOREMAT-NEXT: vle32.v v26, (a3)
; NOREMAT-NEXT: vle32.v v28, (a3)
; NOREMAT-NEXT: csrr a3, vlenb
; NOREMAT-NEXT: slli a3, a3, 2
; NOREMAT-NEXT: add a3, sp, a3
; NOREMAT-NEXT: addi a3, a3, 288
; NOREMAT-NEXT: vs2r.v v28, (a3) # Unknown-size Folded Spill
; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v2
; NOREMAT-NEXT: addiw ra, s1, 512
; NOREMAT-NEXT: add a3, a0, ra
; NOREMAT-NEXT: vle32.v v28, (a3)
; NOREMAT-NEXT: vle32.v v30, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v0
; NOREMAT-NEXT: slli s11, s10, 10
; NOREMAT-NEXT: add a3, a0, s11
; NOREMAT-NEXT: vle32.v v4, (a3)
; NOREMAT-NEXT: vle32.v v18, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v12
; NOREMAT-NEXT: addiw s10, s1, 1536
; NOREMAT-NEXT: add a3, a0, s10
; NOREMAT-NEXT: vle32.v v2, (a3)
; NOREMAT-NEXT: vle32.v v20, (a3)
; NOREMAT-NEXT: addi a3, sp, 288
; NOREMAT-NEXT: vl2r.v v12, (a3) # Unknown-size Folded Reload
; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v8
; NOREMAT-NEXT: slli s9, s8, 11
; NOREMAT-NEXT: add a3, a0, s9
; NOREMAT-NEXT: vle32.v v0, (a3)
; NOREMAT-NEXT: vle32.v v12, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v10
; NOREMAT-NEXT: lui t0, 6
; NOREMAT-NEXT: addiw s8, t0, -1536
; NOREMAT-NEXT: add a3, a0, s8
; NOREMAT-NEXT: vle32.v v8, (a3)
; NOREMAT-NEXT: vle32.v v22, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v6
; NOREMAT-NEXT: slli s7, s6, 10
; NOREMAT-NEXT: add a3, a0, s7
; NOREMAT-NEXT: vle32.v v10, (a3)
; NOREMAT-NEXT: vle32.v v14, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v26
; NOREMAT-NEXT: addiw s6, t0, -512
; NOREMAT-NEXT: add a3, a0, s6
; NOREMAT-NEXT: vle32.v v6, (a3)
; NOREMAT-NEXT: vle32.v v16, (a3)
; NOREMAT-NEXT: csrr a3, vlenb
; NOREMAT-NEXT: slli a3, a3, 2
; NOREMAT-NEXT: add a3, sp, a3
; NOREMAT-NEXT: addi a3, a3, 288
; NOREMAT-NEXT: vl2r.v v24, (a3) # Unknown-size Folded Reload
; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v28
; NOREMAT-NEXT: add a3, a0, t0
; NOREMAT-NEXT: vle32.v v24, (a3)
; NOREMAT-NEXT: vle32.v v26, (a3)
; NOREMAT-NEXT: csrr a3, vlenb
; NOREMAT-NEXT: slli a3, a3, 2
; NOREMAT-NEXT: add a3, sp, a3
; NOREMAT-NEXT: addi a3, a3, 288
; NOREMAT-NEXT: vs2r.v v26, (a3) # Unknown-size Folded Spill
; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v4
; NOREMAT-NEXT: addiw s5, t0, 512
; NOREMAT-NEXT: add a3, a0, s5
; NOREMAT-NEXT: vle32.v v26, (a3)
; NOREMAT-NEXT: vle32.v v28, (a3)
; NOREMAT-NEXT: csrr a3, vlenb
; NOREMAT-NEXT: slli a3, a3, 1
; NOREMAT-NEXT: add a3, sp, a3
; NOREMAT-NEXT: addi a3, a3, 288
; NOREMAT-NEXT: vs2r.v v28, (a3) # Unknown-size Folded Spill
; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v2
; NOREMAT-NEXT: slli s4, s3, 10
; NOREMAT-NEXT: add a3, a0, s4
; NOREMAT-NEXT: vle32.v v28, (a3)
; NOREMAT-NEXT: vle32.v v18, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v0
; NOREMAT-NEXT: addiw s3, t0, 1536
; NOREMAT-NEXT: add a3, a0, s3
; NOREMAT-NEXT: vle32.v v30, (a3)
; NOREMAT-NEXT: vle32.v v20, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v8
; NOREMAT-NEXT: slli s2, s2, 11
; NOREMAT-NEXT: add a3, a0, s2
; NOREMAT-NEXT: vle32.v v4, (a3)
; NOREMAT-NEXT: vle32.v v12, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v10
; NOREMAT-NEXT: lui a3, 7
; NOREMAT-NEXT: addiw s0, a3, -1536
; NOREMAT-NEXT: add a4, a0, s0
; NOREMAT-NEXT: vle32.v v2, (a4)
; NOREMAT-NEXT: vle32.v v22, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v6
; NOREMAT-NEXT: slli t6, t5, 10
; NOREMAT-NEXT: add a4, a0, t6
; NOREMAT-NEXT: vle32.v v0, (a4)
; NOREMAT-NEXT: vle32.v v14, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v24
; NOREMAT-NEXT: addiw t5, a3, -512
; NOREMAT-NEXT: add a4, a0, t5
; NOREMAT-NEXT: vle32.v v6, (a4)
; NOREMAT-NEXT: vle32.v v16, (a4)
; NOREMAT-NEXT: csrr a4, vlenb
; NOREMAT-NEXT: slli a4, a4, 2
; NOREMAT-NEXT: add a4, sp, a4
; NOREMAT-NEXT: addi a4, a4, 288
; NOREMAT-NEXT: vl2r.v v8, (a4) # Unknown-size Folded Reload
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v26
; NOREMAT-NEXT: add a4, a0, a3
; NOREMAT-NEXT: vle32.v v26, (a4)
; NOREMAT-NEXT: vle32.v v8, (a4)
; NOREMAT-NEXT: csrr a4, vlenb
; NOREMAT-NEXT: slli a4, a4, 1
; NOREMAT-NEXT: add a4, sp, a4
; NOREMAT-NEXT: addi a4, a4, 288
; NOREMAT-NEXT: vl2r.v v10, (a4) # Unknown-size Folded Reload
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v28
; NOREMAT-NEXT: addiw t4, a3, 512
; NOREMAT-NEXT: add a4, a0, t4
; NOREMAT-NEXT: vle32.v v10, (a4)
; NOREMAT-NEXT: vle32.v v24, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v30
; NOREMAT-NEXT: slli t3, t2, 10
; NOREMAT-NEXT: add a4, a0, t3
; NOREMAT-NEXT: vle32.v v18, (a4)
; NOREMAT-NEXT: vle32.v v28, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v4
; NOREMAT-NEXT: addiw t2, a3, 1536
; NOREMAT-NEXT: add a4, a0, t2
; NOREMAT-NEXT: vle32.v v20, (a4)
; NOREMAT-NEXT: vle32.v v30, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v2
; NOREMAT-NEXT: slli t1, a2, 11
; NOREMAT-NEXT: add a2, a0, t1
; NOREMAT-NEXT: vle32.v v12, (a2)
; NOREMAT-NEXT: vle32.v v4, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v0
; NOREMAT-NEXT: lui a2, 8
; NOREMAT-NEXT: addiw a7, a2, -1536
; NOREMAT-NEXT: add a4, a0, a7
; NOREMAT-NEXT: vle32.v v22, (a4)
; NOREMAT-NEXT: vle32.v v2, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v6
; NOREMAT-NEXT: slli a6, a5, 10
; NOREMAT-NEXT: add a4, a0, a6
; NOREMAT-NEXT: vle32.v v14, (a4)
; NOREMAT-NEXT: vle32.v v6, (a4)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v26
; NOREMAT-NEXT: addiw a5, a2, -512
; NOREMAT-NEXT: add a4, a0, a5
; NOREMAT-NEXT: vle32.v v16, (a4)
; NOREMAT-NEXT: vle32.v v26, (a4)
; NOREMAT-NEXT: add a0, a0, a2
; NOREMAT-NEXT: vle32.v v0, (a0)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v10
; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v18
; NOREMAT-NEXT: sf.vc.vv 3, 0, v28, v20
; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v12
; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v22
; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v14
; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v16
; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: addi a0, a1, 1024
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: addi a0, a1, 1536
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 272(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 248(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: lui a0, 1
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 216(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 200(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: lui a0, 2
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 144(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 136(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: lui a0, 3
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 88(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 80(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: lui a0, 4
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s1, a1, s1
; NOREMAT-NEXT: vse32.v v8, (s1)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add ra, a1, ra
; NOREMAT-NEXT: vse32.v v8, (ra)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s11, a1, s11
; NOREMAT-NEXT: vse32.v v8, (s11)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s10, a1, s10
; NOREMAT-NEXT: vse32.v v8, (s10)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s9, a1, s9
; NOREMAT-NEXT: vse32.v v8, (s9)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s8, a1, s8
; NOREMAT-NEXT: vse32.v v8, (s8)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s7, a1, s7
; NOREMAT-NEXT: vse32.v v8, (s7)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s6, a1, s6
; NOREMAT-NEXT: vse32.v v8, (s6)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t0, a1, t0
; NOREMAT-NEXT: vse32.v v8, (t0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s5, a1, s5
; NOREMAT-NEXT: vse32.v v8, (s5)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s4, a1, s4
; NOREMAT-NEXT: vse32.v v8, (s4)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s3, a1, s3
; NOREMAT-NEXT: vse32.v v8, (s3)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s2, a1, s2
; NOREMAT-NEXT: vse32.v v8, (s2)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s0, a1, s0
; NOREMAT-NEXT: vse32.v v8, (s0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t6, a1, t6
; NOREMAT-NEXT: vse32.v v8, (t6)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t5, a1, t5
; NOREMAT-NEXT: vse32.v v8, (t5)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add a3, a1, a3
; NOREMAT-NEXT: vse32.v v8, (a3)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t4, a1, t4
; NOREMAT-NEXT: vse32.v v8, (t4)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t3, a1, t3
; NOREMAT-NEXT: vse32.v v8, (t3)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t2, a1, t2
; NOREMAT-NEXT: vse32.v v8, (t2)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t1, a1, t1
; NOREMAT-NEXT: vse32.v v8, (t1)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add a7, a1, a7
; NOREMAT-NEXT: vse32.v v8, (a7)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add a6, a1, a6
; NOREMAT-NEXT: vse32.v v8, (a6)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add a5, a1, a5
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a5)
; NOREMAT-NEXT: add a0, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: addiw a0, a2, 512
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: addiw a0, a2, 1024
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: addiw a0, a2, 1536
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: li a0, 17
; NOREMAT-NEXT: slli a0, a0, 11
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: lui a0, 9
; NOREMAT-NEXT: addiw a2, a0, -1536
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
; NOREMAT-NEXT: addiw a2, a0, -1024
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a2)
; NOREMAT-NEXT: addiw a2, a0, -512
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
; NOREMAT-NEXT: add a2, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a2)
; NOREMAT-NEXT: addiw a2, a0, 512
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
; NOREMAT-NEXT: addiw a2, a0, 1024
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: vse32.v v10, (a2)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: addiw a0, a0, 1536
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: li a0, 19
; NOREMAT-NEXT: slli a0, a0, 11
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: lui a0, 10
; NOREMAT-NEXT: addiw a2, a0, -1536
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
; NOREMAT-NEXT: addiw a2, a0, -1024
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a2)
; NOREMAT-NEXT: addiw a2, a0, -512
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
; NOREMAT-NEXT: add a2, a1, a0
; NOREMAT-NEXT: vse32.v v10, (a2)
; NOREMAT-NEXT: addiw a0, a0, 512
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: csrr a0, vlenb
; NOREMAT-NEXT: li a1, 6
; NOREMAT-NEXT: mul a0, a0, a1
; NOREMAT-NEXT: add sp, sp, a0
; NOREMAT-NEXT: ld ra, 392(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s1, 376(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s2, 368(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s3, 360(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s4, 352(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s5, 344(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s6, 336(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s7, 328(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s8, 320(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s9, 312(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s10, 304(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: ld s11, 296(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: addi sp, sp, 400
; NOREMAT-NEXT: ret
;
; REMAT-LABEL: test:
; REMAT: # %bb.0:
; REMAT-NEXT: addi sp, sp, -112
; REMAT-NEXT: .cfi_def_cfa_offset 112
; REMAT-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
; REMAT-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
; REMAT-NEXT: .cfi_offset ra, -8
; REMAT-NEXT: .cfi_offset s0, -16
; REMAT-NEXT: .cfi_offset s1, -24
; REMAT-NEXT: .cfi_offset s2, -32
; REMAT-NEXT: .cfi_offset s3, -40
; REMAT-NEXT: .cfi_offset s4, -48
; REMAT-NEXT: .cfi_offset s5, -56
; REMAT-NEXT: .cfi_offset s6, -64
; REMAT-NEXT: .cfi_offset s7, -72
; REMAT-NEXT: .cfi_offset s8, -80
; REMAT-NEXT: .cfi_offset s9, -88
; REMAT-NEXT: .cfi_offset s10, -96
; REMAT-NEXT: .cfi_offset s11, -104
; REMAT-NEXT: li a2, 32
; REMAT-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; REMAT-NEXT: vle32.v v8, (a0)
; REMAT-NEXT: addi a2, a0, 512
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: addi a2, a0, 1024
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v10
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: addi a2, a0, 1536
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 1
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 5
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 3
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 7
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: lui a2, 1
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 9
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 5
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 11
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 3
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 13
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 7
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 15
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: lui a2, 2
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 17
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 9
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 19
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 5
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: li a2, 21
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: li a2, 11
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 23
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v16
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: lui a2, 3
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: li a2, 25
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: li a2, 13
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: li a2, 27
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: li a2, 7
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: li a2, 29
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v26
; REMAT-NEXT: li a2, 15
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v28
; REMAT-NEXT: li a2, 31
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v30
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v6
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: addiw a2, a2, 512
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v4
; REMAT-NEXT: li a2, 17
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v2
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: addiw a2, a2, 1536
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24
; REMAT-NEXT: li a2, 9
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v26
; REMAT-NEXT: lui a2, 5
; REMAT-NEXT: addiw a2, a2, -1536
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v28
; REMAT-NEXT: li a2, 19
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v30
; REMAT-NEXT: lui ra, 5
; REMAT-NEXT: addiw ra, ra, -512
; REMAT-NEXT: add a2, a0, ra
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v6
; REMAT-NEXT: lui s11, 5
; REMAT-NEXT: add a2, a0, s11
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v4
; REMAT-NEXT: lui s10, 5
; REMAT-NEXT: addiw s10, s10, 512
; REMAT-NEXT: add a2, a0, s10
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v2
; REMAT-NEXT: li s9, 21
; REMAT-NEXT: slli s9, s9, 10
; REMAT-NEXT: add a2, a0, s9
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v24
; REMAT-NEXT: lui s8, 5
; REMAT-NEXT: addiw s8, s8, 1536
; REMAT-NEXT: add a2, a0, s8
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v26
; REMAT-NEXT: li s7, 11
; REMAT-NEXT: slli s7, s7, 11
; REMAT-NEXT: add a2, a0, s7
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v28
; REMAT-NEXT: lui s6, 6
; REMAT-NEXT: addiw s6, s6, -1536
; REMAT-NEXT: add a2, a0, s6
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v30
; REMAT-NEXT: li s5, 23
; REMAT-NEXT: slli s5, s5, 10
; REMAT-NEXT: add a2, a0, s5
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v6
; REMAT-NEXT: lui s4, 6
; REMAT-NEXT: addiw s4, s4, -512
; REMAT-NEXT: add a2, a0, s4
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v4
; REMAT-NEXT: lui s3, 6
; REMAT-NEXT: add a2, a0, s3
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v2
; REMAT-NEXT: lui s2, 6
; REMAT-NEXT: addiw s2, s2, 512
; REMAT-NEXT: add a2, a0, s2
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v24
; REMAT-NEXT: li s1, 25
; REMAT-NEXT: slli s1, s1, 10
; REMAT-NEXT: add a2, a0, s1
; REMAT-NEXT: vle32.v v0, (a2)
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v26
; REMAT-NEXT: lui s0, 6
; REMAT-NEXT: addiw s0, s0, 1536
; REMAT-NEXT: add a2, a0, s0
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v28
; REMAT-NEXT: li t6, 13
; REMAT-NEXT: slli t6, t6, 11
; REMAT-NEXT: add a2, a0, t6
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v30
; REMAT-NEXT: lui t5, 7
; REMAT-NEXT: addiw t5, t5, -1536
; REMAT-NEXT: add a2, a0, t5
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v6
; REMAT-NEXT: li t4, 27
; REMAT-NEXT: slli t4, t4, 10
; REMAT-NEXT: add a2, a0, t4
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v4
; REMAT-NEXT: lui t3, 7
; REMAT-NEXT: addiw t3, t3, -512
; REMAT-NEXT: add a2, a0, t3
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v14, v2
; REMAT-NEXT: lui t2, 7
; REMAT-NEXT: add a2, a0, t2
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0
; REMAT-NEXT: lui t1, 7
; REMAT-NEXT: addiw t1, t1, 512
; REMAT-NEXT: add a2, a0, t1
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26
; REMAT-NEXT: li t0, 29
; REMAT-NEXT: slli t0, t0, 10
; REMAT-NEXT: add a2, a0, t0
; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: vle32.v v26, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28
; REMAT-NEXT: lui a7, 7
; REMAT-NEXT: addiw a7, a7, 1536
; REMAT-NEXT: add a2, a0, a7
; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: vle32.v v28, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30
; REMAT-NEXT: li a6, 15
; REMAT-NEXT: slli a6, a6, 11
; REMAT-NEXT: add a2, a0, a6
; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: vle32.v v30, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v24, v6
; REMAT-NEXT: lui a5, 8
; REMAT-NEXT: addiw a5, a5, -1536
; REMAT-NEXT: add a2, a0, a5
; REMAT-NEXT: vle32.v v24, (a2)
; REMAT-NEXT: vle32.v v6, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v4
; REMAT-NEXT: li a4, 31
; REMAT-NEXT: slli a4, a4, 10
; REMAT-NEXT: add a2, a0, a4
; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2
; REMAT-NEXT: lui a3, 8
; REMAT-NEXT: addiw a3, a3, -512
; REMAT-NEXT: add a2, a0, a3
; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: vle32.v v2, (a2)
; REMAT-NEXT: lui a2, 8
; REMAT-NEXT: add a0, a0, a2
; REMAT-NEXT: vle32.v v0, (a0)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; REMAT-NEXT: sf.vc.vv 3, 0, v16, v18
; REMAT-NEXT: sf.vc.vv 3, 0, v26, v20
; REMAT-NEXT: sf.vc.vv 3, 0, v28, v22
; REMAT-NEXT: sf.vc.vv 3, 0, v30, v24
; REMAT-NEXT: sf.vc.vv 3, 0, v6, v10
; REMAT-NEXT: sf.vc.vv 3, 0, v4, v12
; REMAT-NEXT: sf.vc.vv 3, 0, v2, v0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: addi a0, a1, 1024
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: addi a0, a1, 1536
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 1
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 5
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 3
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 7
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 1
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 9
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 5
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 11
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 3
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 13
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 7
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 15
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 2
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 17
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 9
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 19
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 5
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 21
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 11
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 23
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 3
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 25
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 13
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 27
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 7
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 29
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 15
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 31
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 4
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 4
; REMAT-NEXT: addiw a0, a0, 512
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 17
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 4
; REMAT-NEXT: addiw a0, a0, 1536
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 9
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 5
; REMAT-NEXT: addiw a0, a0, -1536
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 19
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add ra, a1, ra
; REMAT-NEXT: vse32.v v8, (ra)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s11, a1, s11
; REMAT-NEXT: vse32.v v8, (s11)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s10, a1, s10
; REMAT-NEXT: vse32.v v8, (s10)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s9, a1, s9
; REMAT-NEXT: vse32.v v8, (s9)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s8, a1, s8
; REMAT-NEXT: vse32.v v8, (s8)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s7, a1, s7
; REMAT-NEXT: vse32.v v8, (s7)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s6, a1, s6
; REMAT-NEXT: vse32.v v8, (s6)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s5, a1, s5
; REMAT-NEXT: vse32.v v8, (s5)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s4, a1, s4
; REMAT-NEXT: vse32.v v8, (s4)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s3, a1, s3
; REMAT-NEXT: vse32.v v8, (s3)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s2, a1, s2
; REMAT-NEXT: vse32.v v8, (s2)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s1, a1, s1
; REMAT-NEXT: vse32.v v8, (s1)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s0, a1, s0
; REMAT-NEXT: vse32.v v8, (s0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t6, a1, t6
; REMAT-NEXT: vse32.v v8, (t6)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t5, a1, t5
; REMAT-NEXT: vse32.v v8, (t5)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t4, a1, t4
; REMAT-NEXT: vse32.v v8, (t4)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t3, a1, t3
; REMAT-NEXT: vse32.v v8, (t3)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t2, a1, t2
; REMAT-NEXT: vse32.v v8, (t2)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t1, a1, t1
; REMAT-NEXT: vse32.v v8, (t1)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t0, a1, t0
; REMAT-NEXT: vse32.v v8, (t0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a7, a1, a7
; REMAT-NEXT: vse32.v v8, (a7)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a6, a1, a6
; REMAT-NEXT: vse32.v v8, (a6)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a5, a1, a5
; REMAT-NEXT: vse32.v v8, (a5)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a4, a1, a4
; REMAT-NEXT: vse32.v v8, (a4)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a3, a1, a3
; REMAT-NEXT: vse32.v v8, (a3)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a2, a1, a2
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a2)
; REMAT-NEXT: lui a0, 8
; REMAT-NEXT: addiw a0, a0, 512
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 8
; REMAT-NEXT: addiw a0, a0, 1024
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 8
; REMAT-NEXT: addiw a0, a0, 1536
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 17
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 9
; REMAT-NEXT: addiw a0, a0, -1536
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 9
; REMAT-NEXT: addiw a0, a0, -1024
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 9
; REMAT-NEXT: addiw a0, a0, -512
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 9
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 9
; REMAT-NEXT: addiw a0, a0, 512
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 9
; REMAT-NEXT: addiw a0, a0, 1024
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 9
; REMAT-NEXT: addiw a0, a0, 1536
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 19
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 10
; REMAT-NEXT: addiw a0, a0, -1536
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 10
; REMAT-NEXT: addiw a0, a0, -1024
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 10
; REMAT-NEXT: addiw a0, a0, -512
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 10
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 10
; REMAT-NEXT: addiw a0, a0, 512
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
; REMAT-NEXT: addi sp, sp, 112
; REMAT-NEXT: ret
%4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 32, i64 2, i64 1)
%5 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %0, i64 %4)
%6 = getelementptr inbounds i32, ptr %0, i64 128
%7 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %6, i64 %4)
%8 = getelementptr inbounds i32, ptr %0, i64 256
%9 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %8, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %5, <vscale x 4 x i32> %7, i64 %4)
%10 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %8, i64 %4)
%11 = getelementptr inbounds i32, ptr %0, i64 384
%12 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %11, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %7, <vscale x 4 x i32> %9, i64 %4)
%13 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %11, i64 %4)
%14 = getelementptr inbounds i32, ptr %0, i64 512
%15 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %14, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %10, <vscale x 4 x i32> %12, i64 %4)
%16 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %14, i64 %4)
%17 = getelementptr inbounds i32, ptr %0, i64 640
%18 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %17, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %13, <vscale x 4 x i32> %15, i64 %4)
%19 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %17, i64 %4)
%20 = getelementptr inbounds i32, ptr %0, i64 768
%21 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %20, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %16, <vscale x 4 x i32> %18, i64 %4)
%22 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %20, i64 %4)
%23 = getelementptr inbounds i32, ptr %0, i64 896
%24 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %23, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %19, <vscale x 4 x i32> %21, i64 %4)
%25 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %23, i64 %4)
%26 = getelementptr inbounds i32, ptr %0, i64 1024
%27 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %26, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %22, <vscale x 4 x i32> %24, i64 %4)
%28 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %26, i64 %4)
%29 = getelementptr inbounds i32, ptr %0, i64 1152
%30 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %29, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %25, <vscale x 4 x i32> %27, i64 %4)
%31 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %29, i64 %4)
%32 = getelementptr inbounds i32, ptr %0, i64 1280
%33 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %32, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %28, <vscale x 4 x i32> %30, i64 %4)
%34 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %32, i64 %4)
%35 = getelementptr inbounds i32, ptr %0, i64 1408
%36 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %35, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %31, <vscale x 4 x i32> %33, i64 %4)
%37 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %35, i64 %4)
%38 = getelementptr inbounds i32, ptr %0, i64 1536
%39 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %38, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %34, <vscale x 4 x i32> %36, i64 %4)
%40 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %38, i64 %4)
%41 = getelementptr inbounds i32, ptr %0, i64 1664
%42 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %41, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %37, <vscale x 4 x i32> %39, i64 %4)
%43 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %41, i64 %4)
%44 = getelementptr inbounds i32, ptr %0, i64 1792
%45 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %44, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %40, <vscale x 4 x i32> %42, i64 %4)
%46 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %44, i64 %4)
%47 = getelementptr inbounds i32, ptr %0, i64 1920
%48 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %47, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %43, <vscale x 4 x i32> %45, i64 %4)
%49 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %47, i64 %4)
%50 = getelementptr inbounds i32, ptr %0, i64 2048
%51 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %50, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %46, <vscale x 4 x i32> %48, i64 %4)
%52 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %50, i64 %4)
%53 = getelementptr inbounds i32, ptr %0, i64 2176
%54 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %53, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %49, <vscale x 4 x i32> %51, i64 %4)
%55 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %53, i64 %4)
%56 = getelementptr inbounds i32, ptr %0, i64 2304
%57 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %56, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %52, <vscale x 4 x i32> %54, i64 %4)
%58 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %56, i64 %4)
%59 = getelementptr inbounds i32, ptr %0, i64 2432
%60 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %59, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %55, <vscale x 4 x i32> %57, i64 %4)
%61 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %59, i64 %4)
%62 = getelementptr inbounds i32, ptr %0, i64 2560
%63 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %62, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %58, <vscale x 4 x i32> %60, i64 %4)
%64 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %62, i64 %4)
%65 = getelementptr inbounds i32, ptr %0, i64 2688
%66 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %65, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %61, <vscale x 4 x i32> %63, i64 %4)
%67 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %65, i64 %4)
%68 = getelementptr inbounds i32, ptr %0, i64 2816
%69 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %68, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %64, <vscale x 4 x i32> %66, i64 %4)
%70 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %68, i64 %4)
%71 = getelementptr inbounds i32, ptr %0, i64 2944
%72 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %71, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %67, <vscale x 4 x i32> %69, i64 %4)
%73 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %71, i64 %4)
%74 = getelementptr inbounds i32, ptr %0, i64 3072
%75 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %74, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %70, <vscale x 4 x i32> %72, i64 %4)
%76 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %74, i64 %4)
%77 = getelementptr inbounds i32, ptr %0, i64 3200
%78 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %77, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %73, <vscale x 4 x i32> %75, i64 %4)
%79 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %77, i64 %4)
%80 = getelementptr inbounds i32, ptr %0, i64 3328
%81 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %80, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %76, <vscale x 4 x i32> %78, i64 %4)
%82 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %80, i64 %4)
%83 = getelementptr inbounds i32, ptr %0, i64 3456
%84 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %83, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %79, <vscale x 4 x i32> %81, i64 %4)
%85 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %83, i64 %4)
%86 = getelementptr inbounds i32, ptr %0, i64 3584
%87 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %86, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %82, <vscale x 4 x i32> %84, i64 %4)
%88 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %86, i64 %4)
%89 = getelementptr inbounds i32, ptr %0, i64 3712
%90 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %89, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %85, <vscale x 4 x i32> %87, i64 %4)
%91 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %89, i64 %4)
%92 = getelementptr inbounds i32, ptr %0, i64 3840
%93 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %92, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %88, <vscale x 4 x i32> %90, i64 %4)
%94 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %92, i64 %4)
%95 = getelementptr inbounds i32, ptr %0, i64 3968
%96 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %95, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %91, <vscale x 4 x i32> %93, i64 %4)
%97 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %95, i64 %4)
%98 = getelementptr inbounds i32, ptr %0, i64 4096
%99 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %98, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %94, <vscale x 4 x i32> %96, i64 %4)
%100 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %98, i64 %4)
%101 = getelementptr inbounds i32, ptr %0, i64 4224
%102 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %101, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %97, <vscale x 4 x i32> %99, i64 %4)
%103 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %101, i64 %4)
%104 = getelementptr inbounds i32, ptr %0, i64 4352
%105 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %104, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %100, <vscale x 4 x i32> %102, i64 %4)
%106 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %104, i64 %4)
%107 = getelementptr inbounds i32, ptr %0, i64 4480
%108 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %107, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %103, <vscale x 4 x i32> %105, i64 %4)
%109 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %107, i64 %4)
%110 = getelementptr inbounds i32, ptr %0, i64 4608
%111 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %110, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %106, <vscale x 4 x i32> %108, i64 %4)
%112 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %110, i64 %4)
%113 = getelementptr inbounds i32, ptr %0, i64 4736
%114 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %113, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %109, <vscale x 4 x i32> %111, i64 %4)
%115 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %113, i64 %4)
%116 = getelementptr inbounds i32, ptr %0, i64 4864
%117 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %116, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %112, <vscale x 4 x i32> %114, i64 %4)
%118 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %116, i64 %4)
%119 = getelementptr inbounds i32, ptr %0, i64 4992
%120 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %119, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %115, <vscale x 4 x i32> %117, i64 %4)
%121 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %119, i64 %4)
%122 = getelementptr inbounds i32, ptr %0, i64 5120
%123 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %122, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %118, <vscale x 4 x i32> %120, i64 %4)
%124 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %122, i64 %4)
%125 = getelementptr inbounds i32, ptr %0, i64 5248
%126 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %125, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %121, <vscale x 4 x i32> %123, i64 %4)
%127 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %125, i64 %4)
%128 = getelementptr inbounds i32, ptr %0, i64 5376
%129 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %128, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %124, <vscale x 4 x i32> %126, i64 %4)
%130 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %128, i64 %4)
%131 = getelementptr inbounds i32, ptr %0, i64 5504
%132 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %131, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %127, <vscale x 4 x i32> %129, i64 %4)
%133 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %131, i64 %4)
%134 = getelementptr inbounds i32, ptr %0, i64 5632
%135 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %134, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %130, <vscale x 4 x i32> %132, i64 %4)
%136 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %134, i64 %4)
%137 = getelementptr inbounds i32, ptr %0, i64 5760
%138 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %137, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %133, <vscale x 4 x i32> %135, i64 %4)
%139 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %137, i64 %4)
%140 = getelementptr inbounds i32, ptr %0, i64 5888
%141 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %140, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %136, <vscale x 4 x i32> %138, i64 %4)
%142 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %140, i64 %4)
%143 = getelementptr inbounds i32, ptr %0, i64 6016
%144 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %143, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %139, <vscale x 4 x i32> %141, i64 %4)
%145 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %143, i64 %4)
%146 = getelementptr inbounds i32, ptr %0, i64 6144
%147 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %146, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %142, <vscale x 4 x i32> %144, i64 %4)
%148 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %146, i64 %4)
%149 = getelementptr inbounds i32, ptr %0, i64 6272
%150 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %149, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %145, <vscale x 4 x i32> %147, i64 %4)
%151 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %149, i64 %4)
%152 = getelementptr inbounds i32, ptr %0, i64 6400
%153 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %152, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %148, <vscale x 4 x i32> %150, i64 %4)
%154 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %152, i64 %4)
%155 = getelementptr inbounds i32, ptr %0, i64 6528
%156 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %155, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %151, <vscale x 4 x i32> %153, i64 %4)
%157 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %155, i64 %4)
%158 = getelementptr inbounds i32, ptr %0, i64 6656
%159 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %158, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %154, <vscale x 4 x i32> %156, i64 %4)
%160 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %158, i64 %4)
%161 = getelementptr inbounds i32, ptr %0, i64 6784
%162 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %161, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %157, <vscale x 4 x i32> %159, i64 %4)
%163 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %161, i64 %4)
%164 = getelementptr inbounds i32, ptr %0, i64 6912
%165 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %164, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %160, <vscale x 4 x i32> %162, i64 %4)
%166 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %164, i64 %4)
%167 = getelementptr inbounds i32, ptr %0, i64 7040
%168 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %167, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %163, <vscale x 4 x i32> %165, i64 %4)
%169 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %167, i64 %4)
%170 = getelementptr inbounds i32, ptr %0, i64 7168
%171 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %170, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %166, <vscale x 4 x i32> %168, i64 %4)
%172 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %170, i64 %4)
%173 = getelementptr inbounds i32, ptr %0, i64 7296
%174 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %173, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %169, <vscale x 4 x i32> %171, i64 %4)
%175 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %173, i64 %4)
%176 = getelementptr inbounds i32, ptr %0, i64 7424
%177 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %176, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %172, <vscale x 4 x i32> %174, i64 %4)
%178 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %176, i64 %4)
%179 = getelementptr inbounds i32, ptr %0, i64 7552
%180 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %179, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %175, <vscale x 4 x i32> %177, i64 %4)
%181 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %179, i64 %4)
%182 = getelementptr inbounds i32, ptr %0, i64 7680
%183 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %182, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %178, <vscale x 4 x i32> %180, i64 %4)
%184 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %182, i64 %4)
%185 = getelementptr inbounds i32, ptr %0, i64 7808
%186 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %185, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %181, <vscale x 4 x i32> %183, i64 %4)
%187 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %185, i64 %4)
%188 = getelementptr inbounds i32, ptr %0, i64 7936
%189 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %188, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %184, <vscale x 4 x i32> %186, i64 %4)
%190 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %188, i64 %4)
%191 = getelementptr inbounds i32, ptr %0, i64 8064
%192 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %191, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %187, <vscale x 4 x i32> %189, i64 %4)
%193 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %191, i64 %4)
%194 = getelementptr inbounds i32, ptr %0, i64 8192
%195 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %194, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %190, <vscale x 4 x i32> %192, i64 %4)
tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %193, <vscale x 4 x i32> %195, i64 %4)
%196 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
%197 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
%198 = getelementptr inbounds i32, ptr %1, i64 256
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %196, ptr %198, i64 %4)
%199 = getelementptr inbounds i32, ptr %1, i64 384
%200 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %197, ptr %199, i64 %4)
%201 = getelementptr inbounds i32, ptr %1, i64 512
%202 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %200, ptr %201, i64 %4)
%203 = getelementptr inbounds i32, ptr %1, i64 640
%204 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %202, ptr %203, i64 %4)
%205 = getelementptr inbounds i32, ptr %1, i64 768
%206 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %204, ptr %205, i64 %4)
%207 = getelementptr inbounds i32, ptr %1, i64 896
%208 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %206, ptr %207, i64 %4)
%209 = getelementptr inbounds i32, ptr %1, i64 1024
%210 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %208, ptr %209, i64 %4)
%211 = getelementptr inbounds i32, ptr %1, i64 1152
%212 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %210, ptr %211, i64 %4)
%213 = getelementptr inbounds i32, ptr %1, i64 1280
%214 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %212, ptr %213, i64 %4)
%215 = getelementptr inbounds i32, ptr %1, i64 1408
%216 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %214, ptr %215, i64 %4)
%217 = getelementptr inbounds i32, ptr %1, i64 1536
%218 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %216, ptr %217, i64 %4)
%219 = getelementptr inbounds i32, ptr %1, i64 1664
%220 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %218, ptr %219, i64 %4)
%221 = getelementptr inbounds i32, ptr %1, i64 1792
%222 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %220, ptr %221, i64 %4)
%223 = getelementptr inbounds i32, ptr %1, i64 1920
%224 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %222, ptr %223, i64 %4)
%225 = getelementptr inbounds i32, ptr %1, i64 2048
%226 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %224, ptr %225, i64 %4)
%227 = getelementptr inbounds i32, ptr %1, i64 2176
%228 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %226, ptr %227, i64 %4)
%229 = getelementptr inbounds i32, ptr %1, i64 2304
%230 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %228, ptr %229, i64 %4)
%231 = getelementptr inbounds i32, ptr %1, i64 2432
%232 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %230, ptr %231, i64 %4)
%233 = getelementptr inbounds i32, ptr %1, i64 2560
%234 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %232, ptr %233, i64 %4)
%235 = getelementptr inbounds i32, ptr %1, i64 2688
%236 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %234, ptr %235, i64 %4)
%237 = getelementptr inbounds i32, ptr %1, i64 2816
%238 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %236, ptr %237, i64 %4)
%239 = getelementptr inbounds i32, ptr %1, i64 2944
%240 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %238, ptr %239, i64 %4)
%241 = getelementptr inbounds i32, ptr %1, i64 3072
%242 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %240, ptr %241, i64 %4)
%243 = getelementptr inbounds i32, ptr %1, i64 3200
%244 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %242, ptr %243, i64 %4)
%245 = getelementptr inbounds i32, ptr %1, i64 3328
%246 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %244, ptr %245, i64 %4)
%247 = getelementptr inbounds i32, ptr %1, i64 3456
%248 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %246, ptr %247, i64 %4)
%249 = getelementptr inbounds i32, ptr %1, i64 3584
%250 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %248, ptr %249, i64 %4)
%251 = getelementptr inbounds i32, ptr %1, i64 3712
%252 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %250, ptr %251, i64 %4)
%253 = getelementptr inbounds i32, ptr %1, i64 3840
%254 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %252, ptr %253, i64 %4)
%255 = getelementptr inbounds i32, ptr %1, i64 3968
%256 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %254, ptr %255, i64 %4)
%257 = getelementptr inbounds i32, ptr %1, i64 4096
%258 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %256, ptr %257, i64 %4)
%259 = getelementptr inbounds i32, ptr %1, i64 4224
%260 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %258, ptr %259, i64 %4)
%261 = getelementptr inbounds i32, ptr %1, i64 4352
%262 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %260, ptr %261, i64 %4)
%263 = getelementptr inbounds i32, ptr %1, i64 4480
%264 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %262, ptr %263, i64 %4)
%265 = getelementptr inbounds i32, ptr %1, i64 4608
%266 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %264, ptr %265, i64 %4)
%267 = getelementptr inbounds i32, ptr %1, i64 4736
%268 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %266, ptr %267, i64 %4)
%269 = getelementptr inbounds i32, ptr %1, i64 4864
%270 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %268, ptr %269, i64 %4)
%271 = getelementptr inbounds i32, ptr %1, i64 4992
%272 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %270, ptr %271, i64 %4)
%273 = getelementptr inbounds i32, ptr %1, i64 5120
%274 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %272, ptr %273, i64 %4)
%275 = getelementptr inbounds i32, ptr %1, i64 5248
%276 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %274, ptr %275, i64 %4)
%277 = getelementptr inbounds i32, ptr %1, i64 5376
%278 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %276, ptr %277, i64 %4)
%279 = getelementptr inbounds i32, ptr %1, i64 5504
%280 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %278, ptr %279, i64 %4)
%281 = getelementptr inbounds i32, ptr %1, i64 5632
%282 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %280, ptr %281, i64 %4)
%283 = getelementptr inbounds i32, ptr %1, i64 5760
%284 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %282, ptr %283, i64 %4)
%285 = getelementptr inbounds i32, ptr %1, i64 5888
%286 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %284, ptr %285, i64 %4)
%287 = getelementptr inbounds i32, ptr %1, i64 6016
%288 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %286, ptr %287, i64 %4)
%289 = getelementptr inbounds i32, ptr %1, i64 6144
%290 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %288, ptr %289, i64 %4)
%291 = getelementptr inbounds i32, ptr %1, i64 6272
%292 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %290, ptr %291, i64 %4)
%293 = getelementptr inbounds i32, ptr %1, i64 6400
%294 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %292, ptr %293, i64 %4)
%295 = getelementptr inbounds i32, ptr %1, i64 6528
%296 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %294, ptr %295, i64 %4)
%297 = getelementptr inbounds i32, ptr %1, i64 6656
%298 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %296, ptr %297, i64 %4)
%299 = getelementptr inbounds i32, ptr %1, i64 6784
%300 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %298, ptr %299, i64 %4)
%301 = getelementptr inbounds i32, ptr %1, i64 6912
%302 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %300, ptr %301, i64 %4)
%303 = getelementptr inbounds i32, ptr %1, i64 7040
%304 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %302, ptr %303, i64 %4)
%305 = getelementptr inbounds i32, ptr %1, i64 7168
%306 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %304, ptr %305, i64 %4)
%307 = getelementptr inbounds i32, ptr %1, i64 7296
%308 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %306, ptr %307, i64 %4)
%309 = getelementptr inbounds i32, ptr %1, i64 7424
%310 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %308, ptr %309, i64 %4)
%311 = getelementptr inbounds i32, ptr %1, i64 7552
%312 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %310, ptr %311, i64 %4)
%313 = getelementptr inbounds i32, ptr %1, i64 7680
%314 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %312, ptr %313, i64 %4)
%315 = getelementptr inbounds i32, ptr %1, i64 7808
%316 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %314, ptr %315, i64 %4)
%317 = getelementptr inbounds i32, ptr %1, i64 7936
%318 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %316, ptr %317, i64 %4)
%319 = getelementptr inbounds i32, ptr %1, i64 8064
%320 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %318, ptr %319, i64 %4)
%321 = getelementptr inbounds i32, ptr %1, i64 8192
%322 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %320, ptr %321, i64 %4)
%323 = getelementptr inbounds i32, ptr %1, i64 8320
%324 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %322, ptr %323, i64 %4)
%325 = getelementptr inbounds i32, ptr %1, i64 8448
%326 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %324, ptr %325, i64 %4)
%327 = getelementptr inbounds i32, ptr %1, i64 8576
%328 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %326, ptr %327, i64 %4)
%329 = getelementptr inbounds i32, ptr %1, i64 8704
%330 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %328, ptr %329, i64 %4)
%331 = getelementptr inbounds i32, ptr %1, i64 8832
%332 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %330, ptr %331, i64 %4)
%333 = getelementptr inbounds i32, ptr %1, i64 8960
%334 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %332, ptr %333, i64 %4)
%335 = getelementptr inbounds i32, ptr %1, i64 9088
%336 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %334, ptr %335, i64 %4)
%337 = getelementptr inbounds i32, ptr %1, i64 9216
%338 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %336, ptr %337, i64 %4)
%339 = getelementptr inbounds i32, ptr %1, i64 9344
%340 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %338, ptr %339, i64 %4)
%341 = getelementptr inbounds i32, ptr %1, i64 9472
%342 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %340, ptr %341, i64 %4)
%343 = getelementptr inbounds i32, ptr %1, i64 9600
%344 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %342, ptr %343, i64 %4)
%345 = getelementptr inbounds i32, ptr %1, i64 9728
%346 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %344, ptr %345, i64 %4)
%347 = getelementptr inbounds i32, ptr %1, i64 9856
%348 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %346, ptr %347, i64 %4)
%349 = getelementptr inbounds i32, ptr %1, i64 9984
%350 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %348, ptr %349, i64 %4)
%351 = getelementptr inbounds i32, ptr %1, i64 10112
%352 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %350, ptr %351, i64 %4)
%353 = getelementptr inbounds i32, ptr %1, i64 10240
%354 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %352, ptr %353, i64 %4)
%355 = getelementptr inbounds i32, ptr %1, i64 10368
%356 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %354, ptr %355, i64 %4)
%357 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
ret void
}
declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>, ptr, i64)
declare void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64, i64, <vscale x 4 x i32>, <vscale x 4 x i32>, i64)
declare <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64, i64, i64, i64)
declare void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32>, ptr, i64)