With regards to overrunning, the langref (llvm/docs/LangRef.rst)
specifies:
(llvm.experimental.vector.insert)
Elements ``idx`` through (``idx`` + num_elements(``subvec``) - 1)
must be valid ``vec`` indices. If this condition cannot be determined
statically but is false at runtime, then the result vector is
undefined.
(llvm.experimental.vector.extract)
Elements ``idx`` through (``idx`` + num_elements(result_type) - 1)
must be valid vector indices. If this condition cannot be determined
statically but is false at runtime, then the result vector is
undefined.
For the non-mixed cases (e.g. inserting/extracting a scalable into/from
another scalable, or inserting/extracting a fixed into/from another
fixed), it is possible to statically check whether or not the above
conditions are met. This was previously missing from the verifier, and
if the conditions were found to be false, the result of the
insertion/extraction would be replaced with an undef.
With regards to invalid indices, the langref (llvm/docs/LangRef.rst)
specifies:
(llvm.experimental.vector.insert)
``idx`` represents the starting element number at which ``subvec``
will be inserted. ``idx`` must be a constant multiple of
``subvec``'s known minimum vector length.
(llvm.experimental.vector.extract)
The ``idx`` specifies the starting element number within ``vec``
from which a subvector is extracted. ``idx`` must be a constant
multiple of the known-minimum vector length of the result type.
Similarly, these conditions were not previously enforced in the
verifier. In some circumstances, invalid indices were permitted
silently, and in other circumstances, an undef was spawned where a
verifier error would have been preferred.
This commit adds verifier checks to enforce the constraints above.
Differential Revision: https://reviews.llvm.org/D104468
612 lines
25 KiB
LLVM
612 lines
25 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
|
|
; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
|
|
; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
|
|
; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
|
|
|
|
define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, <2 x i32>* %svp) {
|
|
; CHECK-LABEL: insert_nxv8i32_v2i32_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; CHECK-NEXT: vle32.v v28, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v8, v28, 0
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 0)
|
|
ret <vscale x 8 x i32> %v
|
|
}
|
|
|
|
define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, <2 x i32>* %svp) {
|
|
; CHECK-LABEL: insert_nxv8i32_v2i32_2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; CHECK-NEXT: vle32.v v28, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v8, v28, 2
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 2)
|
|
ret <vscale x 8 x i32> %v
|
|
}
|
|
|
|
define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, <2 x i32>* %svp) {
|
|
; CHECK-LABEL: insert_nxv8i32_v2i32_6:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; CHECK-NEXT: vle32.v v28, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v8, v28, 6
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 6)
|
|
ret <vscale x 8 x i32> %v
|
|
}
|
|
|
|
define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
|
|
; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0:
|
|
; LMULMAX2: # %bb.0:
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v28, (a0)
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m4, tu, mu
|
|
; LMULMAX2-NEXT: vslideup.vi v8, v28, 0
|
|
; LMULMAX2-NEXT: ret
|
|
;
|
|
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0:
|
|
; LMULMAX1: # %bb.0:
|
|
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v28, (a0)
|
|
; LMULMAX1-NEXT: addi a0, a0, 16
|
|
; LMULMAX1-NEXT: vle32.v v12, (a0)
|
|
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v8, v28, 0
|
|
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v8, v12, 4
|
|
; LMULMAX1-NEXT: ret
|
|
%sv = load <8 x i32>, <8 x i32>* %svp
|
|
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
|
|
ret <vscale x 8 x i32> %v
|
|
}
|
|
|
|
define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
|
|
; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8:
|
|
; LMULMAX2: # %bb.0:
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v28, (a0)
|
|
; LMULMAX2-NEXT: vsetivli zero, 16, e32, m4, tu, mu
|
|
; LMULMAX2-NEXT: vslideup.vi v8, v28, 8
|
|
; LMULMAX2-NEXT: ret
|
|
;
|
|
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8:
|
|
; LMULMAX1: # %bb.0:
|
|
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v28, (a0)
|
|
; LMULMAX1-NEXT: addi a0, a0, 16
|
|
; LMULMAX1-NEXT: vle32.v v12, (a0)
|
|
; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v8, v28, 8
|
|
; LMULMAX1-NEXT: vsetivli zero, 16, e32, m4, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v8, v12, 12
|
|
; LMULMAX1-NEXT: ret
|
|
%sv = load <8 x i32>, <8 x i32>* %svp
|
|
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
|
|
ret <vscale x 8 x i32> %v
|
|
}
|
|
|
|
define <vscale x 8 x i32> @insert_nxv8i32_undef_v2i32_0(<2 x i32>* %svp) {
|
|
; CHECK-LABEL: insert_nxv8i32_undef_v2i32_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; CHECK-NEXT: vle32.v v8, (a0)
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> undef, <2 x i32> %sv, i64 0)
|
|
ret <vscale x 8 x i32> %v
|
|
}
|
|
|
|
define void @insert_v4i32_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) {
|
|
; CHECK-LABEL: insert_v4i32_v2i32_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; CHECK-NEXT: vle32.v v25, (a1)
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; CHECK-NEXT: vle32.v v26, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v26, v25, 0
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; CHECK-NEXT: vse32.v v26, (a0)
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%vec = load <4 x i32>, <4 x i32>* %vp
|
|
%v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 0)
|
|
store <4 x i32> %v, <4 x i32>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v4i32_v2i32_2(<4 x i32>* %vp, <2 x i32>* %svp) {
|
|
; CHECK-LABEL: insert_v4i32_v2i32_2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; CHECK-NEXT: vle32.v v25, (a1)
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; CHECK-NEXT: vle32.v v26, (a0)
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v26, v25, 2
|
|
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
|
|
; CHECK-NEXT: vse32.v v26, (a0)
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%vec = load <4 x i32>, <4 x i32>* %vp
|
|
%v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 2)
|
|
store <4 x i32> %v, <4 x i32>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v4i32_undef_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) {
|
|
; CHECK-LABEL: insert_v4i32_undef_v2i32_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; CHECK-NEXT: vle32.v v25, (a1)
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; CHECK-NEXT: vmv.v.i v26, 0
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v26, v25, 0
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; CHECK-NEXT: vse32.v v26, (a0)
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %sv, i64 0)
|
|
store <4 x i32> %v, <4 x i32>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v8i32_v2i32_0(<8 x i32>* %vp, <2 x i32>* %svp) {
|
|
; LMULMAX2-LABEL: insert_v8i32_v2i32_0:
|
|
; LMULMAX2: # %bb.0:
|
|
; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v26, (a1)
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v28, (a0)
|
|
; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, tu, mu
|
|
; LMULMAX2-NEXT: vslideup.vi v28, v26, 0
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vse32.v v28, (a0)
|
|
; LMULMAX2-NEXT: ret
|
|
;
|
|
; LMULMAX1-LABEL: insert_v8i32_v2i32_0:
|
|
; LMULMAX1: # %bb.0:
|
|
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v25, (a1)
|
|
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v26, (a0)
|
|
; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
|
|
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vse32.v v26, (a0)
|
|
; LMULMAX1-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%vec = load <8 x i32>, <8 x i32>* %vp
|
|
%v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 0)
|
|
store <8 x i32> %v, <8 x i32>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v8i32_v2i32_2(<8 x i32>* %vp, <2 x i32>* %svp) {
|
|
; LMULMAX2-LABEL: insert_v8i32_v2i32_2:
|
|
; LMULMAX2: # %bb.0:
|
|
; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v26, (a1)
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v28, (a0)
|
|
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, mu
|
|
; LMULMAX2-NEXT: vslideup.vi v28, v26, 2
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vse32.v v28, (a0)
|
|
; LMULMAX2-NEXT: ret
|
|
;
|
|
; LMULMAX1-LABEL: insert_v8i32_v2i32_2:
|
|
; LMULMAX1: # %bb.0:
|
|
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v25, (a1)
|
|
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v26, (a0)
|
|
; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v26, v25, 2
|
|
; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vse32.v v26, (a0)
|
|
; LMULMAX1-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%vec = load <8 x i32>, <8 x i32>* %vp
|
|
%v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 2)
|
|
store <8 x i32> %v, <8 x i32>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v8i32_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) {
|
|
; LMULMAX2-LABEL: insert_v8i32_v2i32_6:
|
|
; LMULMAX2: # %bb.0:
|
|
; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v26, (a1)
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v28, (a0)
|
|
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m2, tu, mu
|
|
; LMULMAX2-NEXT: vslideup.vi v28, v26, 6
|
|
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vse32.v v28, (a0)
|
|
; LMULMAX2-NEXT: ret
|
|
;
|
|
; LMULMAX1-LABEL: insert_v8i32_v2i32_6:
|
|
; LMULMAX1: # %bb.0:
|
|
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v25, (a1)
|
|
; LMULMAX1-NEXT: addi a0, a0, 16
|
|
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v26, (a0)
|
|
; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v26, v25, 2
|
|
; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vse32.v v26, (a0)
|
|
; LMULMAX1-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%vec = load <8 x i32>, <8 x i32>* %vp
|
|
%v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 6)
|
|
store <8 x i32> %v, <8 x i32>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v8i32_undef_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) {
|
|
; LMULMAX2-LABEL: insert_v8i32_undef_v2i32_6:
|
|
; LMULMAX2: # %bb.0:
|
|
; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; LMULMAX2-NEXT: vle32.v v26, (a1)
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu
|
|
; LMULMAX2-NEXT: vslideup.vi v28, v26, 6
|
|
; LMULMAX2-NEXT: vse32.v v28, (a0)
|
|
; LMULMAX2-NEXT: ret
|
|
;
|
|
; LMULMAX1-LABEL: insert_v8i32_undef_v2i32_6:
|
|
; LMULMAX1: # %bb.0:
|
|
; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
|
|
; LMULMAX1-NEXT: vle32.v v25, (a1)
|
|
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v26, v25, 2
|
|
; LMULMAX1-NEXT: addi a0, a0, 16
|
|
; LMULMAX1-NEXT: vse32.v v26, (a0)
|
|
; LMULMAX1-NEXT: ret
|
|
%sv = load <2 x i32>, <2 x i32>* %svp
|
|
%v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6)
|
|
store <8 x i32> %v, <8 x i32>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v4i16_v2i16_0(<4 x i16>* %vp, <2 x i16>* %svp) {
|
|
; CHECK-LABEL: insert_v4i16_v2i16_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
|
; CHECK-NEXT: vle16.v v25, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
|
|
; CHECK-NEXT: vle16.v v26, (a1)
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v25, v26, 0
|
|
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
|
; CHECK-NEXT: vse16.v v25, (a0)
|
|
; CHECK-NEXT: ret
|
|
%v = load <4 x i16>, <4 x i16>* %vp
|
|
%sv = load <2 x i16>, <2 x i16>* %svp
|
|
%c = call <4 x i16> @llvm.experimental.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 0)
|
|
store <4 x i16> %c, <4 x i16>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v4i16_v2i16_2(<4 x i16>* %vp, <2 x i16>* %svp) {
|
|
; CHECK-LABEL: insert_v4i16_v2i16_2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
|
|
; CHECK-NEXT: vle16.v v25, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
|
|
; CHECK-NEXT: vle16.v v26, (a1)
|
|
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v25, v26, 2
|
|
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
|
|
; CHECK-NEXT: vse16.v v25, (a0)
|
|
; CHECK-NEXT: ret
|
|
%v = load <4 x i16>, <4 x i16>* %vp
|
|
%sv = load <2 x i16>, <2 x i16>* %svp
|
|
%c = call <4 x i16> @llvm.experimental.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 2)
|
|
store <4 x i16> %c, <4 x i16>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v32i1_v8i1_0(<32 x i1>* %vp, <8 x i1>* %svp) {
|
|
; LMULMAX2-LABEL: insert_v32i1_v8i1_0:
|
|
; LMULMAX2: # %bb.0:
|
|
; LMULMAX2-NEXT: addi a2, zero, 32
|
|
; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu
|
|
; LMULMAX2-NEXT: vle1.v v25, (a0)
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; LMULMAX2-NEXT: vle1.v v26, (a1)
|
|
; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf4, tu, mu
|
|
; LMULMAX2-NEXT: vslideup.vi v25, v26, 0
|
|
; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu
|
|
; LMULMAX2-NEXT: vse1.v v25, (a0)
|
|
; LMULMAX2-NEXT: ret
|
|
;
|
|
; LMULMAX1-LABEL: insert_v32i1_v8i1_0:
|
|
; LMULMAX1: # %bb.0:
|
|
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu
|
|
; LMULMAX1-NEXT: vle1.v v25, (a0)
|
|
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; LMULMAX1-NEXT: vle1.v v26, (a1)
|
|
; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v25, v26, 0
|
|
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu
|
|
; LMULMAX1-NEXT: vse1.v v25, (a0)
|
|
; LMULMAX1-NEXT: ret
|
|
%v = load <32 x i1>, <32 x i1>* %vp
|
|
%sv = load <8 x i1>, <8 x i1>* %svp
|
|
%c = call <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 0)
|
|
store <32 x i1> %c, <32 x i1>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v32i1_v8i1_16(<32 x i1>* %vp, <8 x i1>* %svp) {
|
|
; LMULMAX2-LABEL: insert_v32i1_v8i1_16:
|
|
; LMULMAX2: # %bb.0:
|
|
; LMULMAX2-NEXT: addi a2, zero, 32
|
|
; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu
|
|
; LMULMAX2-NEXT: vle1.v v25, (a0)
|
|
; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; LMULMAX2-NEXT: vle1.v v26, (a1)
|
|
; LMULMAX2-NEXT: vsetivli zero, 3, e8, mf4, tu, mu
|
|
; LMULMAX2-NEXT: vslideup.vi v25, v26, 2
|
|
; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu
|
|
; LMULMAX2-NEXT: vse1.v v25, (a0)
|
|
; LMULMAX2-NEXT: ret
|
|
;
|
|
; LMULMAX1-LABEL: insert_v32i1_v8i1_16:
|
|
; LMULMAX1: # %bb.0:
|
|
; LMULMAX1-NEXT: addi a0, a0, 2
|
|
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu
|
|
; LMULMAX1-NEXT: vle1.v v25, (a0)
|
|
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; LMULMAX1-NEXT: vle1.v v26, (a1)
|
|
; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, tu, mu
|
|
; LMULMAX1-NEXT: vslideup.vi v25, v26, 0
|
|
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu
|
|
; LMULMAX1-NEXT: vse1.v v25, (a0)
|
|
; LMULMAX1-NEXT: ret
|
|
%v = load <32 x i1>, <32 x i1>* %vp
|
|
%sv = load <8 x i1>, <8 x i1>* %svp
|
|
%c = call <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16)
|
|
store <32 x i1> %c, <32 x i1>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v8i1_v4i1_0(<8 x i1>* %vp, <4 x i1>* %svp) {
|
|
; CHECK-LABEL: insert_v8i1_v4i1_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; CHECK-NEXT: vle1.v v0, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
|
|
; CHECK-NEXT: vle1.v v27, (a1)
|
|
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; CHECK-NEXT: vmv.v.i v25, 0
|
|
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
|
|
; CHECK-NEXT: vmv.v.i v26, 0
|
|
; CHECK-NEXT: vmv1r.v v0, v27
|
|
; CHECK-NEXT: vmerge.vim v26, v26, 1, v0
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v25, v26, 0
|
|
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; CHECK-NEXT: vmsne.vi v25, v25, 0
|
|
; CHECK-NEXT: vse1.v v25, (a0)
|
|
; CHECK-NEXT: ret
|
|
%v = load <8 x i1>, <8 x i1>* %vp
|
|
%sv = load <4 x i1>, <4 x i1>* %svp
|
|
%c = call <8 x i1> @llvm.experimental.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 0)
|
|
store <8 x i1> %c, <8 x i1>* %vp
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v8i1_v4i1_4(<8 x i1>* %vp, <4 x i1>* %svp) {
|
|
; CHECK-LABEL: insert_v8i1_v4i1_4:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; CHECK-NEXT: vle1.v v0, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
|
|
; CHECK-NEXT: vle1.v v27, (a1)
|
|
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; CHECK-NEXT: vmv.v.i v25, 0
|
|
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
|
|
; CHECK-NEXT: vmv.v.i v26, 0
|
|
; CHECK-NEXT: vmv1r.v v0, v27
|
|
; CHECK-NEXT: vmerge.vim v26, v26, 1, v0
|
|
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v25, v26, 4
|
|
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
|
|
; CHECK-NEXT: vmsne.vi v25, v25, 0
|
|
; CHECK-NEXT: vse1.v v25, (a0)
|
|
; CHECK-NEXT: ret
|
|
%v = load <8 x i1>, <8 x i1>* %vp
|
|
%sv = load <4 x i1>, <4 x i1>* %svp
|
|
%c = call <8 x i1> @llvm.experimental.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 4)
|
|
store <8 x i1> %c, <8 x i1>* %vp
|
|
ret void
|
|
}
|
|
|
|
define <vscale x 2 x i16> @insert_nxv2i16_v2i16_0(<vscale x 2 x i16> %v, <2 x i16>* %svp) {
|
|
; CHECK-LABEL: insert_nxv2i16_v2i16_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
|
|
; CHECK-NEXT: vle16.v v25, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v8, v25, 0
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i16>, <2 x i16>* %svp
|
|
%c = call <vscale x 2 x i16> @llvm.experimental.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 0)
|
|
ret <vscale x 2 x i16> %c
|
|
}
|
|
|
|
define <vscale x 2 x i16> @insert_nxv2i16_v2i16_2(<vscale x 2 x i16> %v, <2 x i16>* %svp) {
|
|
; CHECK-LABEL: insert_nxv2i16_v2i16_2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
|
|
; CHECK-NEXT: vle16.v v25, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 6, e16, mf2, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v8, v25, 4
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i16>, <2 x i16>* %svp
|
|
%c = call <vscale x 2 x i16> @llvm.experimental.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 4)
|
|
ret <vscale x 2 x i16> %c
|
|
}
|
|
|
|
define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, <4 x i1>* %svp) {
|
|
; CHECK-LABEL: insert_nxv2i1_v4i1_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
|
|
; CHECK-NEXT: vle1.v v27, (a0)
|
|
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
|
|
; CHECK-NEXT: vmv.v.i v25, 0
|
|
; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
|
|
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
|
|
; CHECK-NEXT: vmv.v.i v26, 0
|
|
; CHECK-NEXT: vmv1r.v v0, v27
|
|
; CHECK-NEXT: vmerge.vim v26, v26, 1, v0
|
|
; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v25, v26, 0
|
|
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
|
|
; CHECK-NEXT: vmsne.vi v0, v25, 0
|
|
; CHECK-NEXT: ret
|
|
%sv = load <4 x i1>, <4 x i1>* %svp
|
|
%c = call <vscale x 2 x i1> @llvm.experimental.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 0)
|
|
ret <vscale x 2 x i1> %c
|
|
}
|
|
|
|
define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %svp) {
|
|
; CHECK-LABEL: insert_nxv8i1_v4i1_0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; CHECK-NEXT: vle1.v v25, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v0, v25, 0
|
|
; CHECK-NEXT: ret
|
|
%sv = load <8 x i1>, <8 x i1>* %svp
|
|
%c = call <vscale x 8 x i1> @llvm.experimental.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 0)
|
|
ret <vscale x 8 x i1> %c
|
|
}
|
|
|
|
define <vscale x 8 x i1> @insert_nxv8i1_v8i1_16(<vscale x 8 x i1> %v, <8 x i1>* %svp) {
|
|
; CHECK-LABEL: insert_nxv8i1_v8i1_16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
|
|
; CHECK-NEXT: vle1.v v25, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 3, e8, mf8, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v0, v25, 2
|
|
; CHECK-NEXT: ret
|
|
%sv = load <8 x i1>, <8 x i1>* %svp
|
|
%c = call <vscale x 8 x i1> @llvm.experimental.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 16)
|
|
ret <vscale x 8 x i1> %c
|
|
}
|
|
|
|
declare <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64>, <2 x i64>, i64)
|
|
|
|
define void @insert_v2i64_nxv16i64(<2 x i64>* %psv0, <2 x i64>* %psv1, <vscale x 16 x i64>* %out) {
|
|
; CHECK-LABEL: insert_v2i64_nxv16i64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
|
; CHECK-NEXT: vle64.v v8, (a0)
|
|
; CHECK-NEXT: vle64.v v16, (a1)
|
|
; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, mu
|
|
; CHECK-NEXT: vslideup.vi v8, v16, 4
|
|
; CHECK-NEXT: vs8r.v v8, (a2)
|
|
; CHECK-NEXT: ret
|
|
%sv0 = load <2 x i64>, <2 x i64>* %psv0
|
|
%sv1 = load <2 x i64>, <2 x i64>* %psv1
|
|
%v0 = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv0, i64 0)
|
|
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> %v0, <2 x i64> %sv1, i64 4)
|
|
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v2i64_nxv16i64_lo0(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
|
|
; CHECK-LABEL: insert_v2i64_nxv16i64_lo0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
|
; CHECK-NEXT: vle64.v v8, (a0)
|
|
; CHECK-NEXT: vs8r.v v8, (a1)
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i64>, <2 x i64>* %psv
|
|
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 0)
|
|
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
|
|
ret void
|
|
}
|
|
|
|
define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
|
|
; CHECK-LABEL: insert_v2i64_nxv16i64_lo2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
|
; CHECK-NEXT: vle64.v v8, (a0)
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m8, ta, mu
|
|
; CHECK-NEXT: vslideup.vi v16, v8, 2
|
|
; CHECK-NEXT: vs8r.v v16, (a1)
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i64>, <2 x i64>* %psv
|
|
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 2)
|
|
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
|
|
ret void
|
|
}
|
|
|
|
; Check we don't mistakenly optimize this: we don't know whether this is
|
|
; inserted into the low or high split vector.
|
|
define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
|
|
; CHECK-LABEL: insert_v2i64_nxv16i64_hi:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: addi sp, sp, -16
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: csrr a2, vlenb
|
|
; CHECK-NEXT: slli a2, a2, 4
|
|
; CHECK-NEXT: sub sp, sp, a2
|
|
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
|
; CHECK-NEXT: vle64.v v25, (a0)
|
|
; CHECK-NEXT: addi a0, sp, 80
|
|
; CHECK-NEXT: vse64.v v25, (a0)
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 3
|
|
; CHECK-NEXT: addi a2, sp, 16
|
|
; CHECK-NEXT: add a2, a2, a0
|
|
; CHECK-NEXT: vl8re64.v v8, (a2)
|
|
; CHECK-NEXT: addi a2, sp, 16
|
|
; CHECK-NEXT: vl8re64.v v16, (a2)
|
|
; CHECK-NEXT: add a0, a1, a0
|
|
; CHECK-NEXT: vs8r.v v8, (a0)
|
|
; CHECK-NEXT: vs8r.v v16, (a1)
|
|
; CHECK-NEXT: csrr a0, vlenb
|
|
; CHECK-NEXT: slli a0, a0, 4
|
|
; CHECK-NEXT: add sp, sp, a0
|
|
; CHECK-NEXT: addi sp, sp, 16
|
|
; CHECK-NEXT: ret
|
|
%sv = load <2 x i64>, <2 x i64>* %psv
|
|
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 8)
|
|
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
|
|
ret void
|
|
}
|
|
|
|
declare <8 x i1> @llvm.experimental.vector.insert.v4i1.v8i1(<8 x i1>, <4 x i1>, i64)
|
|
declare <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1>, <8 x i1>, i64)
|
|
|
|
declare <4 x i16> @llvm.experimental.vector.insert.v2i16.v4i16(<4 x i16>, <2 x i16>, i64)
|
|
|
|
declare <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32>, <2 x i32>, i64)
|
|
declare <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32>, <2 x i32>, i64)
|
|
|
|
declare <vscale x 2 x i1> @llvm.experimental.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1>, <4 x i1>, i64)
|
|
declare <vscale x 8 x i1> @llvm.experimental.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1>, <8 x i1>, i64)
|
|
|
|
declare <vscale x 2 x i16> @llvm.experimental.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16>, <2 x i16>, i64)
|
|
|
|
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32>, <2 x i32>, i64)
|
|
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.v4i32.nxv8i32(<vscale x 8 x i32>, <4 x i32>, i64)
|
|
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32>, <8 x i32>, i64)
|