This patch try to get rid of vsetvl implict vl/vtype def-use chain and improve the register allocation quality by moving the vsetvl insertion pass after RVV register allocation It will gain the benefit for the following optimization from 1. unblock scheduler's constraints by removing vl/vtype def-use chain 2. Support RVV re-materialization 3. Support partial spill This patch add a new option `-riscv-vsetvl-after-rvv-regalloc=<1|0>` to control this feature and default set as disable.
222 lines
8.1 KiB
LLVM
222 lines
8.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
|
|
; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
|
|
|
|
define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
|
|
; CHECK-LABEL: v4xi8_concat_vector_insert_idx0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a0)
|
|
; CHECK-NEXT: vle8.v v9, (a1)
|
|
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
|
|
; CHECK-NEXT: vmv.s.x v8, a2
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; CHECK-NEXT: vslideup.vi v8, v9, 2
|
|
; CHECK-NEXT: vse8.v v8, (a0)
|
|
; CHECK-NEXT: ret
|
|
%v1 = load <2 x i8>, ptr %a
|
|
%v2 = load <2 x i8>, ptr %b
|
|
%concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ins = insertelement <4 x i8> %concat, i8 %x, i32 0
|
|
store <4 x i8> %ins, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) {
|
|
; CHECK-LABEL: v4xi8_concat_vector_insert_idx1:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a0)
|
|
; CHECK-NEXT: vle8.v v9, (a1)
|
|
; CHECK-NEXT: vmv.s.x v10, a2
|
|
; CHECK-NEXT: vslideup.vi v8, v10, 1
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; CHECK-NEXT: vslideup.vi v8, v9, 2
|
|
; CHECK-NEXT: vse8.v v8, (a0)
|
|
; CHECK-NEXT: ret
|
|
%v1 = load <2 x i8>, ptr %a
|
|
%v2 = load <2 x i8>, ptr %b
|
|
%concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ins = insertelement <4 x i8> %concat, i8 %x, i32 1
|
|
store <4 x i8> %ins, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) {
|
|
; CHECK-LABEL: v4xi8_concat_vector_insert_idx2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vle8.v v9, (a0)
|
|
; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
|
|
; CHECK-NEXT: vmv.s.x v8, a2
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; CHECK-NEXT: vslideup.vi v9, v8, 2
|
|
; CHECK-NEXT: vse8.v v9, (a0)
|
|
; CHECK-NEXT: ret
|
|
%v1 = load <2 x i8>, ptr %a
|
|
%v2 = load <2 x i8>, ptr %b
|
|
%concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ins = insertelement <4 x i8> %concat, i8 %x, i32 2
|
|
store <4 x i8> %ins, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) {
|
|
; CHECK-LABEL: v4xi8_concat_vector_insert_idx3:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vle8.v v9, (a0)
|
|
; CHECK-NEXT: vmv.s.x v10, a2
|
|
; CHECK-NEXT: vslideup.vi v8, v10, 1
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
|
|
; CHECK-NEXT: vslideup.vi v9, v8, 2
|
|
; CHECK-NEXT: vse8.v v9, (a0)
|
|
; CHECK-NEXT: ret
|
|
%v1 = load <2 x i8>, ptr %a
|
|
%v2 = load <2 x i8>, ptr %b
|
|
%concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ins = insertelement <4 x i8> %concat, i8 %x, i32 3
|
|
store <4 x i8> %ins, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
|
|
; RV32-LABEL: v4xi64_concat_vector_insert_idx0:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vle64.v v8, (a0)
|
|
; RV32-NEXT: vle64.v v10, (a1)
|
|
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
|
|
; RV32-NEXT: vslide1down.vx v8, v8, a2
|
|
; RV32-NEXT: vslide1down.vx v8, v8, a3
|
|
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RV32-NEXT: vslideup.vi v8, v10, 2
|
|
; RV32-NEXT: vse64.v v8, (a0)
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: v4xi64_concat_vector_insert_idx0:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV64-NEXT: vle64.v v8, (a0)
|
|
; RV64-NEXT: vle64.v v10, (a1)
|
|
; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
|
|
; RV64-NEXT: vmv.s.x v8, a2
|
|
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RV64-NEXT: vslideup.vi v8, v10, 2
|
|
; RV64-NEXT: vse64.v v8, (a0)
|
|
; RV64-NEXT: ret
|
|
%v1 = load <2 x i64>, ptr %a
|
|
%v2 = load <2 x i64>, ptr %b
|
|
%concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ins = insertelement <4 x i64> %concat, i64 %x, i32 0
|
|
store <4 x i64> %ins, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
|
|
; RV32-LABEL: v4xi64_concat_vector_insert_idx1:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vle64.v v8, (a0)
|
|
; RV32-NEXT: vle64.v v10, (a1)
|
|
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
|
|
; RV32-NEXT: vslide1down.vx v9, v8, a2
|
|
; RV32-NEXT: vslide1down.vx v9, v9, a3
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vslideup.vi v8, v9, 1
|
|
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RV32-NEXT: vslideup.vi v8, v10, 2
|
|
; RV32-NEXT: vse64.v v8, (a0)
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: v4xi64_concat_vector_insert_idx1:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV64-NEXT: vle64.v v8, (a0)
|
|
; RV64-NEXT: vle64.v v10, (a1)
|
|
; RV64-NEXT: vmv.s.x v9, a2
|
|
; RV64-NEXT: vslideup.vi v8, v9, 1
|
|
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RV64-NEXT: vslideup.vi v8, v10, 2
|
|
; RV64-NEXT: vse64.v v8, (a0)
|
|
; RV64-NEXT: ret
|
|
%v1 = load <2 x i64>, ptr %a
|
|
%v2 = load <2 x i64>, ptr %b
|
|
%concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ins = insertelement <4 x i64> %concat, i64 %x, i32 1
|
|
store <4 x i64> %ins, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) {
|
|
; RV32-LABEL: v4xi64_concat_vector_insert_idx2:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vle64.v v8, (a1)
|
|
; RV32-NEXT: vle64.v v10, (a0)
|
|
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
|
|
; RV32-NEXT: vslide1down.vx v8, v8, a2
|
|
; RV32-NEXT: vslide1down.vx v8, v8, a3
|
|
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RV32-NEXT: vslideup.vi v10, v8, 2
|
|
; RV32-NEXT: vse64.v v10, (a0)
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: v4xi64_concat_vector_insert_idx2:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV64-NEXT: vle64.v v8, (a1)
|
|
; RV64-NEXT: vle64.v v10, (a0)
|
|
; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
|
|
; RV64-NEXT: vmv.s.x v8, a2
|
|
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RV64-NEXT: vslideup.vi v10, v8, 2
|
|
; RV64-NEXT: vse64.v v10, (a0)
|
|
; RV64-NEXT: ret
|
|
%v1 = load <2 x i64>, ptr %a
|
|
%v2 = load <2 x i64>, ptr %b
|
|
%concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ins = insertelement <4 x i64> %concat, i64 %x, i32 2
|
|
store <4 x i64> %ins, ptr %a
|
|
ret void
|
|
}
|
|
|
|
define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
|
|
; RV32-LABEL: v4xi64_concat_vector_insert_idx3:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vle64.v v8, (a1)
|
|
; RV32-NEXT: vle64.v v10, (a0)
|
|
; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
|
|
; RV32-NEXT: vslide1down.vx v9, v8, a2
|
|
; RV32-NEXT: vslide1down.vx v9, v9, a3
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vslideup.vi v8, v9, 1
|
|
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RV32-NEXT: vslideup.vi v10, v8, 2
|
|
; RV32-NEXT: vse64.v v10, (a0)
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV64-NEXT: vle64.v v8, (a1)
|
|
; RV64-NEXT: vle64.v v10, (a0)
|
|
; RV64-NEXT: vmv.s.x v9, a2
|
|
; RV64-NEXT: vslideup.vi v8, v9, 1
|
|
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; RV64-NEXT: vslideup.vi v10, v8, 2
|
|
; RV64-NEXT: vse64.v v10, (a0)
|
|
; RV64-NEXT: ret
|
|
%v1 = load <2 x i64>, ptr %a
|
|
%v2 = load <2 x i64>, ptr %b
|
|
%concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%ins = insertelement <4 x i64> %concat, i64 %x, i32 3
|
|
store <4 x i64> %ins, ptr %a
|
|
ret void
|
|
}
|