Files
clang-p2996/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
Piyou Chen 675e7bd1b9 [RISCV] Support postRA vsetvl insertion pass (#70549)
This patch try to get rid of vsetvl implict vl/vtype def-use chain and
improve the register allocation quality by moving the vsetvl insertion
pass after RVV register allocation

It will gain the benefit for the following optimization from

1. unblock scheduler's constraints by removing vl/vtype def-use chain
2. Support RVV re-materialization
3. Support partial spill

This patch add a new option `-riscv-vsetvl-after-rvv-regalloc=<1|0>` to
control this feature and default set as disable.
2024-05-21 14:42:55 +08:00

222 lines
8.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
; CHECK-LABEL: v4xi8_concat_vector_insert_idx0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
; CHECK-NEXT: vmv.s.x v8, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
%v1 = load <2 x i8>, ptr %a
%v2 = load <2 x i8>, ptr %b
%concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ins = insertelement <4 x i8> %concat, i8 %x, i32 0
store <4 x i8> %ins, ptr %a
ret void
}
define void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) {
; CHECK-LABEL: v4xi8_concat_vector_insert_idx1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
%v1 = load <2 x i8>, ptr %a
%v2 = load <2 x i8>, ptr %b
%concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ins = insertelement <4 x i8> %concat, i8 %x, i32 1
store <4 x i8> %ins, ptr %a
ret void
}
define void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) {
; CHECK-LABEL: v4xi8_concat_vector_insert_idx2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
; CHECK-NEXT: vmv.s.x v8, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 2
; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: ret
%v1 = load <2 x i8>, ptr %a
%v2 = load <2 x i8>, ptr %b
%concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ins = insertelement <4 x i8> %concat, i8 %x, i32 2
store <4 x i8> %ins, ptr %a
ret void
}
define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) {
; CHECK-LABEL: v4xi8_concat_vector_insert_idx3:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 2
; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: ret
%v1 = load <2 x i8>, ptr %a
%v2 = load <2 x i8>, ptr %b
%concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ins = insertelement <4 x i8> %concat, i8 %x, i32 3
store <4 x i8> %ins, ptr %a
ret void
}
define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
; RV32-LABEL: v4xi64_concat_vector_insert_idx0:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: vle64.v v10, (a1)
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vslideup.vi v8, v10, 2
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: v4xi64_concat_vector_insert_idx0:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: vle64.v v10, (a1)
; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; RV64-NEXT: vmv.s.x v8, a2
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vslideup.vi v8, v10, 2
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
%v1 = load <2 x i64>, ptr %a
%v2 = load <2 x i64>, ptr %b
%concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ins = insertelement <4 x i64> %concat, i64 %x, i32 0
store <4 x i64> %ins, ptr %a
ret void
}
define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
; RV32-LABEL: v4xi64_concat_vector_insert_idx1:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: vle64.v v10, (a1)
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-NEXT: vslide1down.vx v9, v8, a2
; RV32-NEXT: vslide1down.vx v9, v9, a3
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vslideup.vi v8, v10, 2
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: v4xi64_concat_vector_insert_idx1:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: vle64.v v10, (a1)
; RV64-NEXT: vmv.s.x v9, a2
; RV64-NEXT: vslideup.vi v8, v9, 1
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vslideup.vi v8, v10, 2
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
%v1 = load <2 x i64>, ptr %a
%v2 = load <2 x i64>, ptr %b
%concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ins = insertelement <4 x i64> %concat, i64 %x, i32 1
store <4 x i64> %ins, ptr %a
ret void
}
define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) {
; RV32-LABEL: v4xi64_concat_vector_insert_idx2:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a1)
; RV32-NEXT: vle64.v v10, (a0)
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vslideup.vi v10, v8, 2
; RV32-NEXT: vse64.v v10, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: v4xi64_concat_vector_insert_idx2:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a1)
; RV64-NEXT: vle64.v v10, (a0)
; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; RV64-NEXT: vmv.s.x v8, a2
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vslideup.vi v10, v8, 2
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: ret
%v1 = load <2 x i64>, ptr %a
%v2 = load <2 x i64>, ptr %b
%concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ins = insertelement <4 x i64> %concat, i64 %x, i32 2
store <4 x i64> %ins, ptr %a
ret void
}
define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
; RV32-LABEL: v4xi64_concat_vector_insert_idx3:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a1)
; RV32-NEXT: vle64.v v10, (a0)
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV32-NEXT: vslide1down.vx v9, v8, a2
; RV32-NEXT: vslide1down.vx v9, v9, a3
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vslideup.vi v10, v8, 2
; RV32-NEXT: vse64.v v10, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a1)
; RV64-NEXT: vle64.v v10, (a0)
; RV64-NEXT: vmv.s.x v9, a2
; RV64-NEXT: vslideup.vi v8, v9, 1
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vslideup.vi v10, v8, 2
; RV64-NEXT: vse64.v v10, (a0)
; RV64-NEXT: ret
%v1 = load <2 x i64>, ptr %a
%v2 = load <2 x i64>, ptr %b
%concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ins = insertelement <4 x i64> %concat, i64 %x, i32 3
store <4 x i64> %ins, ptr %a
ret void
}