[MachineLICM] Hoist copies of constant physical register (#93285)
Previously, we just check if the source is a virtual register and this prevents some potential hoists. We can see some improvements in AArch64/RISCV tests.
This commit is contained in:
@@ -1269,8 +1269,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
|
||||
Register DefReg = MI.getOperand(0).getReg();
|
||||
if (DefReg.isVirtual() &&
|
||||
all_of(MI.uses(),
|
||||
[](const MachineOperand &UseOp) {
|
||||
return !UseOp.isReg() || UseOp.getReg().isVirtual();
|
||||
[this](const MachineOperand &UseOp) {
|
||||
return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
|
||||
MRI->isConstantPhysReg(UseOp.getReg());
|
||||
}) &&
|
||||
IsLoopInvariantInst(MI, CurLoop) &&
|
||||
any_of(MRI->use_nodbg_instructions(DefReg),
|
||||
|
||||
@@ -55,15 +55,15 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
|
||||
define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
|
||||
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, x0
|
||||
; CHECK-NEXT: .LBB3_1: // %atomicrmw.start
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldaxr x0, [x8]
|
||||
; CHECK-NEXT: cmp x0, x1
|
||||
; CHECK-NEXT: csinc x9, xzr, x0, hs
|
||||
; CHECK-NEXT: stlxr w10, x9, [x8]
|
||||
; CHECK-NEXT: ldaxr x8, [x0]
|
||||
; CHECK-NEXT: cmp x8, x1
|
||||
; CHECK-NEXT: csinc x9, xzr, x8, hs
|
||||
; CHECK-NEXT: stlxr w10, x9, [x0]
|
||||
; CHECK-NEXT: cbnz w10, .LBB3_1
|
||||
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
|
||||
; CHECK-NEXT: mov x0, x8
|
||||
; CHECK-NEXT: ret
|
||||
%result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst
|
||||
ret i64 %result
|
||||
|
||||
@@ -8,57 +8,57 @@ declare void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8>, <vscale x
|
||||
define fastcc i8 @allocno_reload_assign() {
|
||||
; CHECK-LABEL: allocno_reload_assign:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.b, #0 // =0x0
|
||||
; CHECK-NEXT: mov z16.d, #0 // =0x0
|
||||
; CHECK-NEXT: fmov d0, xzr
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: mov z16.d, #0 // =0x0
|
||||
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
|
||||
; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s
|
||||
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
|
||||
; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
|
||||
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: mov z0.b, #0 // =0x0
|
||||
; CHECK-NEXT: sbfx x8, x8, #0, #1
|
||||
; CHECK-NEXT: uunpklo z1.h, z0.b
|
||||
; CHECK-NEXT: uunpkhi z0.h, z0.b
|
||||
; CHECK-NEXT: whilelo p1.b, xzr, x8
|
||||
; CHECK-NEXT: not p0.b, p0/z, p1.b
|
||||
; CHECK-NEXT: uunpklo z2.s, z1.h
|
||||
; CHECK-NEXT: uunpkhi z3.s, z1.h
|
||||
; CHECK-NEXT: uunpklo z5.s, z0.h
|
||||
; CHECK-NEXT: uunpkhi z7.s, z0.h
|
||||
; CHECK-NEXT: punpklo p1.h, p0.b
|
||||
; CHECK-NEXT: punpkhi p0.h, p0.b
|
||||
; CHECK-NEXT: punpklo p2.h, p1.b
|
||||
; CHECK-NEXT: uunpklo z0.d, z2.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z2.s
|
||||
; CHECK-NEXT: punpkhi p3.h, p1.b
|
||||
; CHECK-NEXT: uunpklo z2.d, z3.s
|
||||
; CHECK-NEXT: uunpkhi z3.d, z3.s
|
||||
; CHECK-NEXT: punpklo p5.h, p0.b
|
||||
; CHECK-NEXT: uunpklo z4.d, z5.s
|
||||
; CHECK-NEXT: uunpkhi z5.d, z5.s
|
||||
; CHECK-NEXT: punpkhi p7.h, p0.b
|
||||
; CHECK-NEXT: uunpklo z6.d, z7.s
|
||||
; CHECK-NEXT: uunpkhi z7.d, z7.s
|
||||
; CHECK-NEXT: punpklo p0.h, p2.b
|
||||
; CHECK-NEXT: punpkhi p1.h, p2.b
|
||||
; CHECK-NEXT: punpklo p2.h, p3.b
|
||||
; CHECK-NEXT: punpkhi p3.h, p3.b
|
||||
; CHECK-NEXT: punpklo p4.h, p5.b
|
||||
; CHECK-NEXT: punpkhi p5.h, p5.b
|
||||
; CHECK-NEXT: punpklo p6.h, p7.b
|
||||
; CHECK-NEXT: punpkhi p7.h, p7.b
|
||||
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: fmov d17, xzr
|
||||
; CHECK-NEXT: cmpeq p2.d, p0/z, z17.d, #0
|
||||
; CHECK-NEXT: uzp1 p2.s, p2.s, p0.s
|
||||
; CHECK-NEXT: uzp1 p2.h, p2.h, p0.h
|
||||
; CHECK-NEXT: uzp1 p2.b, p2.b, p0.b
|
||||
; CHECK-NEXT: mov z17.b, p2/z, #1 // =0x1
|
||||
; CHECK-NEXT: fmov w8, s17
|
||||
; CHECK-NEXT: sbfx x8, x8, #0, #1
|
||||
; CHECK-NEXT: whilelo p2.b, xzr, x8
|
||||
; CHECK-NEXT: not p2.b, p1/z, p2.b
|
||||
; CHECK-NEXT: punpklo p3.h, p2.b
|
||||
; CHECK-NEXT: punpkhi p2.h, p2.b
|
||||
; CHECK-NEXT: punpklo p4.h, p3.b
|
||||
; CHECK-NEXT: punpkhi p3.h, p3.b
|
||||
; CHECK-NEXT: punpklo p5.h, p4.b
|
||||
; CHECK-NEXT: punpkhi p4.h, p4.b
|
||||
; CHECK-NEXT: st1b { z0.d }, p5, [z16.d]
|
||||
; CHECK-NEXT: st1b { z1.d }, p4, [z16.d]
|
||||
; CHECK-NEXT: punpklo p4.h, p3.b
|
||||
; CHECK-NEXT: punpkhi p3.h, p3.b
|
||||
; CHECK-NEXT: st1b { z2.d }, p4, [z16.d]
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [z16.d]
|
||||
; CHECK-NEXT: st1b { z1.d }, p1, [z16.d]
|
||||
; CHECK-NEXT: st1b { z2.d }, p2, [z16.d]
|
||||
; CHECK-NEXT: st1b { z3.d }, p3, [z16.d]
|
||||
; CHECK-NEXT: punpklo p3.h, p2.b
|
||||
; CHECK-NEXT: punpkhi p2.h, p2.b
|
||||
; CHECK-NEXT: punpklo p4.h, p3.b
|
||||
; CHECK-NEXT: punpkhi p3.h, p3.b
|
||||
; CHECK-NEXT: st1b { z4.d }, p4, [z16.d]
|
||||
; CHECK-NEXT: st1b { z5.d }, p3, [z16.d]
|
||||
; CHECK-NEXT: punpklo p3.h, p2.b
|
||||
; CHECK-NEXT: punpkhi p2.h, p2.b
|
||||
; CHECK-NEXT: st1b { z6.d }, p3, [z16.d]
|
||||
; CHECK-NEXT: st1b { z7.d }, p2, [z16.d]
|
||||
; CHECK-NEXT: st1b { z5.d }, p5, [z16.d]
|
||||
; CHECK-NEXT: st1b { z6.d }, p6, [z16.d]
|
||||
; CHECK-NEXT: st1b { z7.d }, p7, [z16.d]
|
||||
; CHECK-NEXT: b .LBB0_1
|
||||
br label %1
|
||||
|
||||
|
||||
@@ -9,20 +9,20 @@ target triple = "arm64-apple-macosx13.5.0"
|
||||
define i32 @nsis_BZ2_bzDecompress(ptr %pos.i, i1 %cmp661.not3117.i, i1 %exitcond.not.i) {
|
||||
; CHECK-LABEL: nsis_BZ2_bzDecompress:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: b .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_1: // %while.end671.i
|
||||
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: strb w8, [x0]
|
||||
; CHECK-NEXT: strb w9, [x0]
|
||||
; CHECK-NEXT: tbnz w2, #0, .LBB0_4
|
||||
; CHECK-NEXT: .LBB0_2: // %for.body653.i
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrb w8, [x0]
|
||||
; CHECK-NEXT: ldrb w9, [x0]
|
||||
; CHECK-NEXT: tbnz w1, #0, .LBB0_1
|
||||
; CHECK-NEXT: // %bb.3: // %while.body663.i
|
||||
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: mov x9, xzr
|
||||
; CHECK-NEXT: ldrb w9, [x9]
|
||||
; CHECK-NEXT: strb wzr, [x0, x9]
|
||||
; CHECK-NEXT: ldrb w10, [x8]
|
||||
; CHECK-NEXT: strb wzr, [x0, x10]
|
||||
; CHECK-NEXT: b .LBB0_1
|
||||
; CHECK-NEXT: .LBB0_4: // %for.end677.i
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
|
||||
@@ -8,36 +8,39 @@
|
||||
define dso_local void @run_test() local_unnamed_addr uwtable {
|
||||
; CHECK-LABEL: run_test:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: sub sp, sp, #192
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 192
|
||||
; CHECK-NEXT: sub sp, sp, #208
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 208
|
||||
; CHECK-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: str x23, [sp, #160] // 8-byte Folded Spill
|
||||
; CHECK-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_offset w19, -8
|
||||
; CHECK-NEXT: .cfi_offset w20, -16
|
||||
; CHECK-NEXT: .cfi_offset w21, -24
|
||||
; CHECK-NEXT: .cfi_offset w22, -32
|
||||
; CHECK-NEXT: .cfi_offset b8, -40
|
||||
; CHECK-NEXT: .cfi_offset b9, -48
|
||||
; CHECK-NEXT: .cfi_offset b10, -56
|
||||
; CHECK-NEXT: .cfi_offset b11, -64
|
||||
; CHECK-NEXT: .cfi_offset b12, -72
|
||||
; CHECK-NEXT: .cfi_offset b13, -80
|
||||
; CHECK-NEXT: .cfi_offset b14, -88
|
||||
; CHECK-NEXT: .cfi_offset b15, -96
|
||||
; CHECK-NEXT: .cfi_offset w23, -48
|
||||
; CHECK-NEXT: .cfi_offset b8, -56
|
||||
; CHECK-NEXT: .cfi_offset b9, -64
|
||||
; CHECK-NEXT: .cfi_offset b10, -72
|
||||
; CHECK-NEXT: .cfi_offset b11, -80
|
||||
; CHECK-NEXT: .cfi_offset b12, -88
|
||||
; CHECK-NEXT: .cfi_offset b13, -96
|
||||
; CHECK-NEXT: .cfi_offset b14, -104
|
||||
; CHECK-NEXT: .cfi_offset b15, -112
|
||||
; CHECK-NEXT: movi v2.2d, #0000000000000000
|
||||
; CHECK-NEXT: // implicit-def: $q1
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: mov x9, xzr
|
||||
; CHECK-NEXT: adrp x10, B+48
|
||||
; CHECK-NEXT: add x10, x10, :lo12:B+48
|
||||
; CHECK-NEXT: adrp x11, A
|
||||
; CHECK-NEXT: add x11, x11, :lo12:A
|
||||
; CHECK-NEXT: adrp x9, B+48
|
||||
; CHECK-NEXT: add x9, x9, :lo12:B+48
|
||||
; CHECK-NEXT: adrp x10, A
|
||||
; CHECK-NEXT: add x10, x10, :lo12:A
|
||||
; CHECK-NEXT: mov x11, xzr
|
||||
; CHECK-NEXT: // kill: killed $q1
|
||||
; CHECK-NEXT: // implicit-def: $q1
|
||||
; CHECK-NEXT: mov x12, xzr
|
||||
; CHECK-NEXT: // implicit-def: $q0
|
||||
; CHECK-NEXT: // implicit-def: $q3
|
||||
; CHECK-NEXT: // implicit-def: $q4
|
||||
@@ -69,103 +72,102 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
|
||||
; CHECK-NEXT: // kill: killed $q1
|
||||
; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: ldr q14, [x8]
|
||||
; CHECK-NEXT: mov x12, xzr
|
||||
; CHECK-NEXT: ldr x14, [x12]
|
||||
; CHECK-NEXT: stp q29, q15, [sp] // 32-byte Folded Spill
|
||||
; CHECK-NEXT: add x19, x11, x8
|
||||
; CHECK-NEXT: fmov x15, d14
|
||||
; CHECK-NEXT: mov x16, v14.d[1]
|
||||
; CHECK-NEXT: ldr q15, [x12]
|
||||
; CHECK-NEXT: ldr q14, [x10], #64
|
||||
; CHECK-NEXT: ldr q15, [x8]
|
||||
; CHECK-NEXT: ldr x15, [x8]
|
||||
; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: add x20, x10, x11
|
||||
; CHECK-NEXT: mov v8.16b, v28.16b
|
||||
; CHECK-NEXT: fmov x13, d15
|
||||
; CHECK-NEXT: mov x18, v15.d[1]
|
||||
; CHECK-NEXT: fmov x2, d15
|
||||
; CHECK-NEXT: mov x17, v15.d[1]
|
||||
; CHECK-NEXT: ldr q14, [x8]
|
||||
; CHECK-NEXT: mov v28.16b, v24.16b
|
||||
; CHECK-NEXT: mul x17, x15, x14
|
||||
; CHECK-NEXT: mov x12, v14.d[1]
|
||||
; CHECK-NEXT: fmov x4, d14
|
||||
; CHECK-NEXT: mov v24.16b, v20.16b
|
||||
; CHECK-NEXT: mov v20.16b, v17.16b
|
||||
; CHECK-NEXT: fmov x13, d14
|
||||
; CHECK-NEXT: mov x16, v14.d[1]
|
||||
; CHECK-NEXT: mov v17.16b, v5.16b
|
||||
; CHECK-NEXT: mul x1, x16, x14
|
||||
; CHECK-NEXT: mul x3, x2, x15
|
||||
; CHECK-NEXT: ldr q14, [x9], #64
|
||||
; CHECK-NEXT: ldr q5, [sp, #64] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr x5, [x8]
|
||||
; CHECK-NEXT: ldr x19, [x19, #128]
|
||||
; CHECK-NEXT: ldr x6, [x8]
|
||||
; CHECK-NEXT: ldr x20, [x20, #128]
|
||||
; CHECK-NEXT: mul x1, x17, x15
|
||||
; CHECK-NEXT: mov x14, v14.d[1]
|
||||
; CHECK-NEXT: fmov x5, d14
|
||||
; CHECK-NEXT: mov v29.16b, v21.16b
|
||||
; CHECK-NEXT: mov v21.16b, v0.16b
|
||||
; CHECK-NEXT: mul x0, x13, x14
|
||||
; CHECK-NEXT: mov v25.16b, v6.16b
|
||||
; CHECK-NEXT: mul x18, x13, x15
|
||||
; CHECK-NEXT: mov v6.16b, v2.16b
|
||||
; CHECK-NEXT: fmov d15, x17
|
||||
; CHECK-NEXT: mov v26.16b, v22.16b
|
||||
; CHECK-NEXT: fmov d15, x3
|
||||
; CHECK-NEXT: mov v22.16b, v18.16b
|
||||
; CHECK-NEXT: mul x2, x18, x14
|
||||
; CHECK-NEXT: mov v18.16b, v7.16b
|
||||
; CHECK-NEXT: mul x0, x16, x15
|
||||
; CHECK-NEXT: mov v7.16b, v3.16b
|
||||
; CHECK-NEXT: mov v16.16b, v4.16b
|
||||
; CHECK-NEXT: add x8, x8, #8
|
||||
; CHECK-NEXT: add x9, x9, #1
|
||||
; CHECK-NEXT: add x11, x11, #8
|
||||
; CHECK-NEXT: add x12, x12, #1
|
||||
; CHECK-NEXT: mov v15.d[1], x1
|
||||
; CHECK-NEXT: mul x3, x12, x14
|
||||
; CHECK-NEXT: cmp x8, #64
|
||||
; CHECK-NEXT: fmov d14, x0
|
||||
; CHECK-NEXT: mul x14, x4, x14
|
||||
; CHECK-NEXT: mul x4, x14, x15
|
||||
; CHECK-NEXT: cmp x11, #64
|
||||
; CHECK-NEXT: fmov d14, x18
|
||||
; CHECK-NEXT: mul x15, x5, x15
|
||||
; CHECK-NEXT: add v5.2d, v5.2d, v15.2d
|
||||
; CHECK-NEXT: mul x20, x15, x5
|
||||
; CHECK-NEXT: mov v14.d[1], x2
|
||||
; CHECK-NEXT: mul x15, x15, x19
|
||||
; CHECK-NEXT: fmov d0, x14
|
||||
; CHECK-NEXT: mul x21, x2, x6
|
||||
; CHECK-NEXT: mov v14.d[1], x0
|
||||
; CHECK-NEXT: mul x2, x2, x20
|
||||
; CHECK-NEXT: fmov d0, x15
|
||||
; CHECK-NEXT: str q5, [sp, #64] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: ldr q5, [sp, #48] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: mul x21, x13, x19
|
||||
; CHECK-NEXT: mul x22, x13, x20
|
||||
; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
|
||||
; CHECK-NEXT: fmov d3, x20
|
||||
; CHECK-NEXT: mul x7, x16, x5
|
||||
; CHECK-NEXT: mov v0.d[1], x3
|
||||
; CHECK-NEXT: fmov d1, x15
|
||||
; CHECK-NEXT: mul x16, x16, x19
|
||||
; CHECK-NEXT: fmov d3, x21
|
||||
; CHECK-NEXT: mul x19, x17, x6
|
||||
; CHECK-NEXT: mov v0.d[1], x4
|
||||
; CHECK-NEXT: fmov d1, x2
|
||||
; CHECK-NEXT: mul x17, x17, x20
|
||||
; CHECK-NEXT: str q5, [sp, #48] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: add v5.2d, v13.2d, v14.2d
|
||||
; CHECK-NEXT: fmov d2, x21
|
||||
; CHECK-NEXT: fmov d2, x22
|
||||
; CHECK-NEXT: ldr q13, [sp, #80] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: mul x6, x18, x5
|
||||
; CHECK-NEXT: mul x7, x16, x6
|
||||
; CHECK-NEXT: ldp q15, q14, [sp, #16] // 32-byte Folded Reload
|
||||
; CHECK-NEXT: mov v3.d[1], x7
|
||||
; CHECK-NEXT: mov v3.d[1], x19
|
||||
; CHECK-NEXT: add v13.2d, v13.2d, v0.2d
|
||||
; CHECK-NEXT: mul x18, x18, x19
|
||||
; CHECK-NEXT: mov v1.d[1], x16
|
||||
; CHECK-NEXT: mul x22, x4, x19
|
||||
; CHECK-NEXT: mul x16, x16, x20
|
||||
; CHECK-NEXT: mov v1.d[1], x17
|
||||
; CHECK-NEXT: mul x23, x5, x20
|
||||
; CHECK-NEXT: str q13, [sp, #80] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: mov v13.16b, v5.16b
|
||||
; CHECK-NEXT: mov v5.16b, v17.16b
|
||||
; CHECK-NEXT: mov v17.16b, v20.16b
|
||||
; CHECK-NEXT: mov v20.16b, v24.16b
|
||||
; CHECK-NEXT: mul x13, x13, x5
|
||||
; CHECK-NEXT: mul x13, x13, x6
|
||||
; CHECK-NEXT: mov v24.16b, v28.16b
|
||||
; CHECK-NEXT: add v11.2d, v11.2d, v3.2d
|
||||
; CHECK-NEXT: mov v2.d[1], x18
|
||||
; CHECK-NEXT: mov v2.d[1], x16
|
||||
; CHECK-NEXT: add v15.2d, v15.2d, v1.2d
|
||||
; CHECK-NEXT: add v27.2d, v27.2d, v3.2d
|
||||
; CHECK-NEXT: mul x17, x12, x19
|
||||
; CHECK-NEXT: mul x18, x14, x20
|
||||
; CHECK-NEXT: add v23.2d, v23.2d, v3.2d
|
||||
; CHECK-NEXT: add v19.2d, v19.2d, v3.2d
|
||||
; CHECK-NEXT: fmov d4, x22
|
||||
; CHECK-NEXT: fmov d4, x23
|
||||
; CHECK-NEXT: add v10.2d, v10.2d, v3.2d
|
||||
; CHECK-NEXT: mul x14, x4, x5
|
||||
; CHECK-NEXT: mul x15, x5, x6
|
||||
; CHECK-NEXT: fmov d0, x13
|
||||
; CHECK-NEXT: add v14.2d, v14.2d, v2.2d
|
||||
; CHECK-NEXT: add v2.2d, v6.2d, v3.2d
|
||||
; CHECK-NEXT: mul x12, x12, x5
|
||||
; CHECK-NEXT: mul x14, x14, x6
|
||||
; CHECK-NEXT: mov v3.16b, v7.16b
|
||||
; CHECK-NEXT: mov v7.16b, v18.16b
|
||||
; CHECK-NEXT: mov v4.d[1], x17
|
||||
; CHECK-NEXT: mov v4.d[1], x18
|
||||
; CHECK-NEXT: mov v18.16b, v22.16b
|
||||
; CHECK-NEXT: mov v0.d[1], x6
|
||||
; CHECK-NEXT: fmov d1, x14
|
||||
; CHECK-NEXT: mov v0.d[1], x7
|
||||
; CHECK-NEXT: fmov d1, x15
|
||||
; CHECK-NEXT: add v28.2d, v8.2d, v4.2d
|
||||
; CHECK-NEXT: mov v1.d[1], x12
|
||||
; CHECK-NEXT: mov v1.d[1], x14
|
||||
; CHECK-NEXT: add v31.2d, v31.2d, v0.2d
|
||||
; CHECK-NEXT: add v30.2d, v30.2d, v0.2d
|
||||
; CHECK-NEXT: add v12.2d, v12.2d, v0.2d
|
||||
@@ -192,11 +194,12 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
|
||||
; CHECK-NEXT: adrp x8, C
|
||||
; CHECK-NEXT: add x8, x8, :lo12:C
|
||||
; CHECK-NEXT: stp q11, q30, [x8, #80]
|
||||
; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: str q1, [x8]
|
||||
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr x23, [sp, #160] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: stp q15, q14, [x8, #144]
|
||||
; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: stp q1, q13, [x8, #16]
|
||||
; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: stp q28, q12, [x8, #176]
|
||||
@@ -216,12 +219,13 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
|
||||
; CHECK-NEXT: stp q5, q4, [x8, #432]
|
||||
; CHECK-NEXT: stp q2, q3, [x8, #464]
|
||||
; CHECK-NEXT: str q0, [x8, #496]
|
||||
; CHECK-NEXT: add sp, sp, #192
|
||||
; CHECK-NEXT: add sp, sp, #208
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
||||
; CHECK-NEXT: .cfi_restore w19
|
||||
; CHECK-NEXT: .cfi_restore w20
|
||||
; CHECK-NEXT: .cfi_restore w21
|
||||
; CHECK-NEXT: .cfi_restore w22
|
||||
; CHECK-NEXT: .cfi_restore w23
|
||||
; CHECK-NEXT: .cfi_restore b8
|
||||
; CHECK-NEXT: .cfi_restore b9
|
||||
; CHECK-NEXT: .cfi_restore b10
|
||||
|
||||
@@ -142,7 +142,8 @@ attributes #0 = { nounwind }
|
||||
|
||||
; GCN: amdpal.pipelines:
|
||||
; GCN-NEXT: - .registers:
|
||||
; GCN-NEXT: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf01ca{{$}}
|
||||
; SDAG-NEXT: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf01ca{{$}}
|
||||
; GISEL-NEXT: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf01cb{{$}}
|
||||
; GCN-NEXT: '0x2e13 (COMPUTE_PGM_RSRC2)': 0x8001{{$}}
|
||||
; GCN-NEXT: .shader_functions:
|
||||
; GCN-NEXT: dynamic_stack:
|
||||
@@ -156,10 +157,10 @@ attributes #0 = { nounwind }
|
||||
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
; SDAG-NEXT: .sgpr_count: 0x25{{$}}
|
||||
; GISEL-NEXT: .sgpr_count: 0x26{{$}}
|
||||
; GISEL-NEXT: .sgpr_count: 0x27{{$}}
|
||||
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
|
||||
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
|
||||
; GISEL-NEXT: .vgpr_count: 0x5{{$}}
|
||||
; GCN-NEXT: multiple_stack:
|
||||
; GCN-NEXT: .backend_stack_size: 0x24{{$}}
|
||||
; GCN-NEXT: .lds_size: 0{{$}}
|
||||
|
||||
@@ -245,6 +245,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI2]], %subreg.sub0, killed [[PHI3]], %subreg.sub1
|
||||
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
|
||||
; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: bb.3:
|
||||
; SI-NEXT: successors: %bb.4(0x80000000)
|
||||
@@ -261,8 +262,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
|
||||
; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]]
|
||||
; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]]
|
||||
; SI-NEXT: $vgpr0 = COPY killed [[PHI5]]
|
||||
; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
|
||||
; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
@@ -282,6 +282,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1
|
||||
; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
|
||||
; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: bb.7:
|
||||
; SI-NEXT: successors: %bb.8(0x80000000)
|
||||
@@ -298,8 +299,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
|
||||
; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000)
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]]
|
||||
; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY9]]
|
||||
; SI-NEXT: $vgpr0 = COPY killed [[PHI7]]
|
||||
; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
|
||||
; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
@@ -367,6 +367,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI1]], %subreg.sub0, killed [[PHI2]], %subreg.sub1
|
||||
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
|
||||
; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: bb.3:
|
||||
; SI-NEXT: successors: %bb.4(0x80000000)
|
||||
@@ -382,8 +383,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
|
||||
; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]]
|
||||
; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]]
|
||||
; SI-NEXT: $vgpr0 = COPY [[COPY4]]
|
||||
; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
|
||||
; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
@@ -403,6 +403,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1
|
||||
; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
|
||||
; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: bb.7:
|
||||
; SI-NEXT: successors: %bb.8(0x80000000)
|
||||
@@ -418,8 +419,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
|
||||
; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000)
|
||||
; SI-NEXT: {{ $}}
|
||||
; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
||||
; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]]
|
||||
; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY9]]
|
||||
; SI-NEXT: $vgpr0 = COPY [[COPY4]]
|
||||
; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
|
||||
; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
||||
|
||||
@@ -60,13 +60,13 @@ define i64 @shift_i64_i64(i64 %a, i64 %b) {
|
||||
; CHECK-NEXT: breq .LBB3_3
|
||||
; CHECK-NEXT: ; %bb.1: ; %shift.loop.preheader
|
||||
; CHECK-NEXT: mov r27, r1
|
||||
; CHECK-NEXT: mov r16, r1
|
||||
; CHECK-NEXT: mov r17, r1
|
||||
; CHECK-NEXT: mov r16, r27
|
||||
; CHECK-NEXT: mov r17, r27
|
||||
; CHECK-NEXT: .LBB3_2: ; %shift.loop
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: mov r31, r21
|
||||
; CHECK-NEXT: lsl r31
|
||||
; CHECK-NEXT: mov r26, r1
|
||||
; CHECK-NEXT: mov r26, r27
|
||||
; CHECK-NEXT: rol r26
|
||||
; CHECK-NEXT: lsl r22
|
||||
; CHECK-NEXT: rol r23
|
||||
|
||||
41
llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
Normal file
41
llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
Normal file
@@ -0,0 +1,41 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -O3 < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
|
||||
|
||||
declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
|
||||
|
||||
define i32 @test(ptr %a, i64 %n) {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li a3, 0
|
||||
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
|
||||
; CHECK-NEXT: vmv.s.x v8, zero
|
||||
; CHECK-NEXT: .LBB0_1: # %loop
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vl1re32.v v9, (a0)
|
||||
; CHECK-NEXT: mv a2, a3
|
||||
; CHECK-NEXT: vredsum.vs v9, v9, v8
|
||||
; CHECK-NEXT: vmv.x.s a3, v9
|
||||
; CHECK-NEXT: addw a3, a3, a3
|
||||
; CHECK-NEXT: addi a1, a1, -1
|
||||
; CHECK-NEXT: addi a0, a0, 8
|
||||
; CHECK-NEXT: bnez a1, .LBB0_1
|
||||
; CHECK-NEXT: # %bb.2: # %exit
|
||||
; CHECK-NEXT: mv a0, a2
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%indvar = phi i64 [ 0, %entry ], [ %indvar.inc, %loop ]
|
||||
%sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop ]
|
||||
%idx = getelementptr inbounds ptr, ptr %a, i64 %indvar
|
||||
%data = load <vscale x 2 x i32>, ptr %idx
|
||||
%reduce = tail call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %data)
|
||||
%sum.inc = add i32 %reduce, %reduce
|
||||
%indvar.inc = add i64 %indvar, 1
|
||||
%cmp = icmp eq i64 %indvar.inc, %n
|
||||
br i1 %cmp, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret i32 %sum
|
||||
}
|
||||
@@ -15,27 +15,30 @@ define void @foo(<vscale x 8 x i8> %0) {
|
||||
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_offset ra, -8
|
||||
; CHECK-NEXT: .cfi_offset s0, -16
|
||||
; CHECK-NEXT: .cfi_offset s1, -24
|
||||
; CHECK-NEXT: .cfi_offset s2, -32
|
||||
; CHECK-NEXT: li s0, 0
|
||||
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
|
||||
; CHECK-NEXT: vmv.v.i v9, 0
|
||||
; CHECK-NEXT: vsetivli zero, 0, e8, m1, tu, ma
|
||||
; CHECK-NEXT: vslideup.vi v9, v10, 0
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
|
||||
; CHECK-NEXT: vmv.x.s s0, v9
|
||||
; CHECK-NEXT: vmv.x.s s1, v9
|
||||
; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma
|
||||
; CHECK-NEXT: vslideup.vi v8, v9, 0
|
||||
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
|
||||
; CHECK-NEXT: vmv.x.s s1, v8
|
||||
; CHECK-NEXT: vmv.x.s s2, v8
|
||||
; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: li a1, 0
|
||||
; CHECK-NEXT: mv a0, s0
|
||||
; CHECK-NEXT: mv a2, s1
|
||||
; CHECK-NEXT: li a3, 0
|
||||
; CHECK-NEXT: li a4, 0
|
||||
; CHECK-NEXT: li a5, 0
|
||||
; CHECK-NEXT: jalr a1
|
||||
; CHECK-NEXT: mv a0, s1
|
||||
; CHECK-NEXT: mv a1, s0
|
||||
; CHECK-NEXT: mv a2, s2
|
||||
; CHECK-NEXT: mv a3, s0
|
||||
; CHECK-NEXT: mv a4, s0
|
||||
; CHECK-NEXT: mv a5, s0
|
||||
; CHECK-NEXT: jalr s0
|
||||
; CHECK-NEXT: j .LBB0_1
|
||||
%2 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> undef, i64 0)
|
||||
%3 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> poison, i64 0)
|
||||
|
||||
@@ -11,22 +11,22 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
|
||||
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
||||
; RV32-NEXT: vmv.v.x v8, a3
|
||||
; RV32-NEXT: addi a3, a2, 1
|
||||
; RV32-NEXT: addi a4, a0, 1
|
||||
; RV32-NEXT: vmv.s.x v9, zero
|
||||
; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
|
||||
; RV32-NEXT: vslideup.vx v8, v9, a2
|
||||
; RV32-NEXT: addi a2, a0, 1
|
||||
; RV32-NEXT: .LBB0_1: # %for.body
|
||||
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV32-NEXT: th.lrb a0, a1, a0, 0
|
||||
; RV32-NEXT: vmv.s.x v9, zero
|
||||
; RV32-NEXT: vmv1r.v v10, v8
|
||||
; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
|
||||
; RV32-NEXT: vslideup.vx v10, v9, a2
|
||||
; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
|
||||
; RV32-NEXT: vmv.s.x v10, a0
|
||||
; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
|
||||
; RV32-NEXT: vmseq.vi v9, v10, 0
|
||||
; RV32-NEXT: vmv1r.v v9, v8
|
||||
; RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma
|
||||
; RV32-NEXT: vmv.s.x v9, a0
|
||||
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
||||
; RV32-NEXT: vmseq.vi v9, v9, 0
|
||||
; RV32-NEXT: vmv.x.s a0, v9
|
||||
; RV32-NEXT: andi a5, a0, 255
|
||||
; RV32-NEXT: mv a0, a4
|
||||
; RV32-NEXT: bnez a5, .LBB0_1
|
||||
; RV32-NEXT: andi a3, a0, 255
|
||||
; RV32-NEXT: mv a0, a2
|
||||
; RV32-NEXT: bnez a3, .LBB0_1
|
||||
; RV32-NEXT: # %bb.2: # %if.then381
|
||||
; RV32-NEXT: li a0, 0
|
||||
; RV32-NEXT: ret
|
||||
@@ -37,23 +37,23 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
|
||||
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
||||
; RV64-NEXT: vmv.v.x v8, a3
|
||||
; RV64-NEXT: addi a3, a2, 1
|
||||
; RV64-NEXT: addi a4, a0, 1
|
||||
; RV64-NEXT: vmv.s.x v9, zero
|
||||
; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
|
||||
; RV64-NEXT: vslideup.vx v8, v9, a2
|
||||
; RV64-NEXT: addi a2, a0, 1
|
||||
; RV64-NEXT: .LBB0_1: # %for.body
|
||||
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64-NEXT: sext.w a0, a0
|
||||
; RV64-NEXT: th.lrb a0, a1, a0, 0
|
||||
; RV64-NEXT: vmv.s.x v9, zero
|
||||
; RV64-NEXT: vmv1r.v v10, v8
|
||||
; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
|
||||
; RV64-NEXT: vslideup.vx v10, v9, a2
|
||||
; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
|
||||
; RV64-NEXT: vmv.s.x v10, a0
|
||||
; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
|
||||
; RV64-NEXT: vmseq.vi v9, v10, 0
|
||||
; RV64-NEXT: vmv1r.v v9, v8
|
||||
; RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma
|
||||
; RV64-NEXT: vmv.s.x v9, a0
|
||||
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
|
||||
; RV64-NEXT: vmseq.vi v9, v9, 0
|
||||
; RV64-NEXT: vmv.x.s a0, v9
|
||||
; RV64-NEXT: andi a5, a0, 255
|
||||
; RV64-NEXT: mv a0, a4
|
||||
; RV64-NEXT: bnez a5, .LBB0_1
|
||||
; RV64-NEXT: andi a3, a0, 255
|
||||
; RV64-NEXT: mv a0, a2
|
||||
; RV64-NEXT: bnez a3, .LBB0_1
|
||||
; RV64-NEXT: # %bb.2: # %if.then381
|
||||
; RV64-NEXT: li a0, 0
|
||||
; RV64-NEXT: ret
|
||||
|
||||
@@ -71,10 +71,13 @@ define void @machine_licm() {
|
||||
; CHECK-NEXT: addi sp, sp, -16
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
||||
; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_offset ra, -4
|
||||
; CHECK-NEXT: .cfi_offset s0, -8
|
||||
; CHECK-NEXT: csrr s0, vlenb
|
||||
; CHECK-NEXT: .LBB4_1: # %loop
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: mv a0, s0
|
||||
; CHECK-NEXT: call use
|
||||
; CHECK-NEXT: j .LBB4_1
|
||||
entry:
|
||||
|
||||
Reference in New Issue
Block a user