If we know vlen is a multiple of 16, we don't need any alignment padding. I wrote the code so that it would generate the minimum amount of padding if the stack align was 32 or larger or if RVVBitsPerBlock was smaller than half the stack alignment.
2578 lines
108 KiB
LLVM
2578 lines
108 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v,+m \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
|
|
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zvfbfmin,+v,+m \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
|
|
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v,+m \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
|
|
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zvfbfmin,+v,+m \
|
|
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
|
|
|
|
declare void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv1i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv1i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
%tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
|
|
call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
%tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
|
|
call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v11, v8, 0
|
|
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v8, v11, 0
|
|
; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v12, v8, 0
|
|
; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v8, v12, 0
|
|
; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
%tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
|
|
call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv4i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv4i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_truemask_nxv4i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_truemask_nxv4i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv8i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e8, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv8i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e8, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i8:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v12, v9
|
|
; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i8:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v9
|
|
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv1i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv1i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v8, v8, 0
|
|
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
%tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
|
|
call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v11, v8, 0
|
|
; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v8, v11, 0
|
|
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v12, v8, 0
|
|
; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v8, v12, 0
|
|
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
%tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
|
|
call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv4i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv4i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_truemask_nxv4i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_truemask_nxv4i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v12, v10
|
|
; RV32-NEXT: vadd.vv v12, v12, v12
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v12, v10
|
|
; RV32-NEXT: vadd.vv v12, v12, v12
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
|
|
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma
|
|
; RV32-NEXT: vwaddu.vv v12, v10, v10
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma
|
|
; RV64-NEXT: vwaddu.vv v12, v10, v10
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
|
|
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV32-NEXT: vwadd.vv v12, v10, v10
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
|
|
define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vwadd.vv v12, v10, v10, v0.t
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v12, v10, v0.t
|
|
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV64-NEXT: vwadd.vv v16, v12, v12
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
|
|
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
|
|
define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vwaddu.vv v12, v10, v10, v0.t
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v12, v10, v0.t
|
|
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV64-NEXT: vwadd.vv v16, v12, v12
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
|
|
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
|
|
define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v16, v12, v0.t
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v12, v16, 0
|
|
; RV32-NEXT: vadd.vv v12, v12, v12
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vwadd.vv v16, v12, v12, v0.t
|
|
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
|
|
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
|
|
define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v16, v12, v0.t
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v12, v16, 0
|
|
; RV32-NEXT: vadd.vv v12, v12, v12
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vwaddu.vv v16, v12, v12, v0.t
|
|
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
|
|
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv1i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv1i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v11, v8, 0
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
|
|
; RV64-NEXT: vnsrl.wi v12, v8, 0
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
%tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
|
|
call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv4i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv4i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_truemask_nxv4i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_truemask_nxv4i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv8i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv8i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
|
|
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v14, v12
|
|
; RV32-NEXT: vsll.vi v12, v14, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v14, v12
|
|
; RV64-NEXT: vsll.vi v12, v14, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
|
|
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
|
|
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v16, v12
|
|
; RV64-NEXT: vsll.vi v12, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
|
|
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v12, v12, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
|
|
call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv1i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv1i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv4i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv4i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_truemask_nxv4i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_truemask_nxv4i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v18, v16
|
|
; RV32-NEXT: vsll.vi v16, v18, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v18, v16
|
|
; RV64-NEXT: vsll.vi v16, v18, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v20, v16
|
|
; RV64-NEXT: vsll.vi v16, v20, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v24, v16, 0
|
|
; RV32-NEXT: vsll.vi v16, v24, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsll.vi v16, v16, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
|
|
call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv1bf16(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv1bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv1bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv2bf16(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv4bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv4bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_truemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_truemask_nxv4bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_truemask_nxv4bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv8bf16(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv8bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv8bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v12, v10
|
|
; RV32-NEXT: vadd.vv v12, v12, v12
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v12, v10
|
|
; RV32-NEXT: vadd.vv v12, v12, v12
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
|
|
%ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
|
|
call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma
|
|
; RV32-NEXT: vwaddu.vv v12, v10, v10
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma
|
|
; RV64-NEXT: vwaddu.vv v12, v10, v10
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
|
|
%ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
|
|
call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8bf16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV32-NEXT: vwadd.vv v12, v10, v10
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8bf16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv1f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv1f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv4f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv4f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_truemask_nxv4f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_truemask_nxv4f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv8f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv8f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v12, v10
|
|
; RV32-NEXT: vadd.vv v12, v12, v12
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v12, v10
|
|
; RV32-NEXT: vadd.vv v12, v12, v12
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
|
|
%ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e8, m1, ta, ma
|
|
; RV32-NEXT: vwaddu.vv v12, v10, v10
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e8, m1, ta, ma
|
|
; RV64-NEXT: vwaddu.vv v12, v10, v10
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
|
|
%ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8f16:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV32-NEXT: vwadd.vv v12, v10, v10
|
|
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8f16:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v16, v10
|
|
; RV64-NEXT: vadd.vv v16, v16, v16
|
|
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv1f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv1f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv4f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv4f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_truemask_nxv4f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_truemask_nxv4f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv8f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv8f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
|
|
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v14, v12
|
|
; RV32-NEXT: vsll.vi v12, v14, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v14, v12
|
|
; RV64-NEXT: vsll.vi v12, v14, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
|
|
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
|
|
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v16, v12
|
|
; RV32-NEXT: vsll.vi v12, v16, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v16, v12
|
|
; RV64-NEXT: vsll.vi v12, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
|
|
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8f32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v12, v12, 2
|
|
; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8f32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v16, v12
|
|
; RV64-NEXT: vsll.vi v16, v16, 2
|
|
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
|
|
call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv1f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv1f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv2f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv2f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv4f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv4f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_truemask_nxv4f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v12
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_truemask_nxv4f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v12
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double>, <vscale x 6 x ptr>, <vscale x 6 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v18, v16
|
|
; RV32-NEXT: vsll.vi v16, v18, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v18, v16
|
|
; RV64-NEXT: vsll.vi v16, v18, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v20, v16
|
|
; RV64-NEXT: vsll.vi v16, v20, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv6f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v24, v16, 0
|
|
; RV32-NEXT: vsll.vi v16, v24, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv6f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsll.vi v16, v16, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
|
|
call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf4 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf8 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v18, v16
|
|
; RV32-NEXT: vsll.vi v16, v18, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei16.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e16, m2, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v18, v16
|
|
; RV64-NEXT: vsll.vi v16, v18, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei16.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v20, v16
|
|
; RV32-NEXT: vsll.vi v16, v20, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v20, v16
|
|
; RV64-NEXT: vsll.vi v16, v20, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vsll.vi v16, v16, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v24, v16
|
|
; RV64-NEXT: vsll.vi v16, v24, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv8f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
|
|
; RV32-NEXT: vnsrl.wi v24, v16, 0
|
|
; RV32-NEXT: vsll.vi v16, v24, 3
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv8f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsll.vi v16, v16, 3
|
|
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
|
|
call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, <vscale x 16 x i1>, i32)
|
|
|
|
define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_nxv16f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vl8re32.v v24, (a0)
|
|
; RV32-NEXT: csrr a0, vlenb
|
|
; RV32-NEXT: mv a2, a1
|
|
; RV32-NEXT: bltu a1, a0, .LBB108_2
|
|
; RV32-NEXT: # %bb.1:
|
|
; RV32-NEXT: mv a2, a0
|
|
; RV32-NEXT: .LBB108_2:
|
|
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
|
|
; RV32-NEXT: sub a2, a1, a0
|
|
; RV32-NEXT: sltu a1, a1, a2
|
|
; RV32-NEXT: addi a1, a1, -1
|
|
; RV32-NEXT: srli a0, a0, 3
|
|
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
|
|
; RV32-NEXT: vslidedown.vx v0, v0, a0
|
|
; RV32-NEXT: and a1, a1, a2
|
|
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_nxv16f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: addi sp, sp, -16
|
|
; RV64-NEXT: .cfi_def_cfa_offset 16
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: slli a1, a1, 3
|
|
; RV64-NEXT: sub sp, sp, a1
|
|
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: slli a3, a1, 3
|
|
; RV64-NEXT: add a3, a0, a3
|
|
; RV64-NEXT: vl8re64.v v24, (a3)
|
|
; RV64-NEXT: addi a3, sp, 16
|
|
; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
|
|
; RV64-NEXT: vl8re64.v v24, (a0)
|
|
; RV64-NEXT: mv a0, a2
|
|
; RV64-NEXT: bltu a2, a1, .LBB108_2
|
|
; RV64-NEXT: # %bb.1:
|
|
; RV64-NEXT: mv a0, a1
|
|
; RV64-NEXT: .LBB108_2:
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
|
|
; RV64-NEXT: sub a0, a2, a1
|
|
; RV64-NEXT: sltu a2, a2, a0
|
|
; RV64-NEXT: addi a2, a2, -1
|
|
; RV64-NEXT: srli a1, a1, 3
|
|
; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
|
|
; RV64-NEXT: vslidedown.vx v0, v0, a1
|
|
; RV64-NEXT: and a0, a2, a0
|
|
; RV64-NEXT: addi a1, sp, 16
|
|
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
|
|
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
|
|
; RV64-NEXT: csrr a0, vlenb
|
|
; RV64-NEXT: slli a0, a0, 3
|
|
; RV64-NEXT: add sp, sp, a0
|
|
; RV64-NEXT: addi sp, sp, 16
|
|
; RV64-NEXT: ret
|
|
call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vl4re16.v v4, (a1)
|
|
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v24, v4
|
|
; RV32-NEXT: csrr a1, vlenb
|
|
; RV32-NEXT: vsll.vi v24, v24, 3
|
|
; RV32-NEXT: mv a3, a2
|
|
; RV32-NEXT: bltu a2, a1, .LBB109_2
|
|
; RV32-NEXT: # %bb.1:
|
|
; RV32-NEXT: mv a3, a1
|
|
; RV32-NEXT: .LBB109_2:
|
|
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
|
; RV32-NEXT: sub a3, a2, a1
|
|
; RV32-NEXT: sltu a2, a2, a3
|
|
; RV32-NEXT: addi a2, a2, -1
|
|
; RV32-NEXT: srli a1, a1, 3
|
|
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
|
|
; RV32-NEXT: vslidedown.vx v0, v0, a1
|
|
; RV32-NEXT: and a2, a2, a3
|
|
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: addi sp, sp, -16
|
|
; RV64-NEXT: .cfi_def_cfa_offset 16
|
|
; RV64-NEXT: csrr a3, vlenb
|
|
; RV64-NEXT: slli a3, a3, 4
|
|
; RV64-NEXT: sub sp, sp, a3
|
|
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
|
|
; RV64-NEXT: vl4re16.v v24, (a1)
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: slli a1, a1, 3
|
|
; RV64-NEXT: add a1, sp, a1
|
|
; RV64-NEXT: addi a1, a1, 16
|
|
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
|
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v16, v26
|
|
; RV64-NEXT: vsll.vi v16, v16, 3
|
|
; RV64-NEXT: addi a1, sp, 16
|
|
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
|
; RV64-NEXT: vsext.vf4 v16, v24
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: vsll.vi v24, v16, 3
|
|
; RV64-NEXT: mv a3, a2
|
|
; RV64-NEXT: bltu a2, a1, .LBB109_2
|
|
; RV64-NEXT: # %bb.1:
|
|
; RV64-NEXT: mv a3, a1
|
|
; RV64-NEXT: .LBB109_2:
|
|
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
|
|
; RV64-NEXT: sub a3, a2, a1
|
|
; RV64-NEXT: sltu a2, a2, a3
|
|
; RV64-NEXT: addi a2, a2, -1
|
|
; RV64-NEXT: srli a1, a1, 3
|
|
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
|
|
; RV64-NEXT: vslidedown.vx v0, v0, a1
|
|
; RV64-NEXT: and a2, a2, a3
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: slli a1, a1, 3
|
|
; RV64-NEXT: add a1, sp, a1
|
|
; RV64-NEXT: addi a1, a1, 16
|
|
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
|
|
; RV64-NEXT: addi a1, sp, 16
|
|
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
|
|
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: csrr a0, vlenb
|
|
; RV64-NEXT: slli a0, a0, 4
|
|
; RV64-NEXT: add sp, sp, a0
|
|
; RV64-NEXT: addi sp, sp, 16
|
|
; RV64-NEXT: ret
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
|
|
call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vl4re16.v v4, (a1)
|
|
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
|
|
; RV32-NEXT: vsext.vf2 v24, v4
|
|
; RV32-NEXT: csrr a1, vlenb
|
|
; RV32-NEXT: vsll.vi v24, v24, 3
|
|
; RV32-NEXT: mv a3, a2
|
|
; RV32-NEXT: bltu a2, a1, .LBB110_2
|
|
; RV32-NEXT: # %bb.1:
|
|
; RV32-NEXT: mv a3, a1
|
|
; RV32-NEXT: .LBB110_2:
|
|
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
|
; RV32-NEXT: sub a3, a2, a1
|
|
; RV32-NEXT: sltu a2, a2, a3
|
|
; RV32-NEXT: addi a2, a2, -1
|
|
; RV32-NEXT: srli a1, a1, 3
|
|
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
|
|
; RV32-NEXT: vslidedown.vx v0, v0, a1
|
|
; RV32-NEXT: and a2, a2, a3
|
|
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: addi sp, sp, -16
|
|
; RV64-NEXT: .cfi_def_cfa_offset 16
|
|
; RV64-NEXT: csrr a3, vlenb
|
|
; RV64-NEXT: slli a4, a3, 3
|
|
; RV64-NEXT: add a3, a4, a3
|
|
; RV64-NEXT: sub sp, sp, a3
|
|
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb
|
|
; RV64-NEXT: vl4re16.v v24, (a1)
|
|
; RV64-NEXT: addi a1, sp, 16
|
|
; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: add a1, sp, a1
|
|
; RV64-NEXT: addi a1, a1, 16
|
|
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
|
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
|
|
; RV64-NEXT: vsext.vf4 v0, v24
|
|
; RV64-NEXT: vsext.vf4 v16, v26
|
|
; RV64-NEXT: vsll.vi v16, v16, 3
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: vsll.vi v24, v0, 3
|
|
; RV64-NEXT: mv a3, a2
|
|
; RV64-NEXT: bltu a2, a1, .LBB110_2
|
|
; RV64-NEXT: # %bb.1:
|
|
; RV64-NEXT: mv a3, a1
|
|
; RV64-NEXT: .LBB110_2:
|
|
; RV64-NEXT: addi a4, sp, 16
|
|
; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
|
|
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
|
|
; RV64-NEXT: sub a3, a2, a1
|
|
; RV64-NEXT: sltu a2, a2, a3
|
|
; RV64-NEXT: addi a2, a2, -1
|
|
; RV64-NEXT: srli a1, a1, 3
|
|
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
|
|
; RV64-NEXT: vslidedown.vx v0, v0, a1
|
|
; RV64-NEXT: and a2, a2, a3
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: add a1, sp, a1
|
|
; RV64-NEXT: addi a1, a1, 16
|
|
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
|
|
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
|
|
; RV64-NEXT: csrr a0, vlenb
|
|
; RV64-NEXT: slli a1, a0, 3
|
|
; RV64-NEXT: add a0, a1, a0
|
|
; RV64-NEXT: add sp, sp, a0
|
|
; RV64-NEXT: addi sp, sp, 16
|
|
; RV64-NEXT: ret
|
|
%eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|
|
|
|
define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
|
; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vl4re16.v v4, (a1)
|
|
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
|
|
; RV32-NEXT: vzext.vf2 v24, v4
|
|
; RV32-NEXT: csrr a1, vlenb
|
|
; RV32-NEXT: vsll.vi v24, v24, 3
|
|
; RV32-NEXT: mv a3, a2
|
|
; RV32-NEXT: bltu a2, a1, .LBB111_2
|
|
; RV32-NEXT: # %bb.1:
|
|
; RV32-NEXT: mv a3, a1
|
|
; RV32-NEXT: .LBB111_2:
|
|
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
|
; RV32-NEXT: sub a3, a2, a1
|
|
; RV32-NEXT: sltu a2, a2, a3
|
|
; RV32-NEXT: addi a2, a2, -1
|
|
; RV32-NEXT: srli a1, a1, 3
|
|
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
|
|
; RV32-NEXT: vslidedown.vx v0, v0, a1
|
|
; RV32-NEXT: and a2, a2, a3
|
|
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
|
|
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vl4re16.v v4, (a1)
|
|
; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
|
|
; RV64-NEXT: vzext.vf2 v24, v4
|
|
; RV64-NEXT: csrr a1, vlenb
|
|
; RV64-NEXT: vsll.vi v24, v24, 3
|
|
; RV64-NEXT: mv a3, a2
|
|
; RV64-NEXT: bltu a2, a1, .LBB111_2
|
|
; RV64-NEXT: # %bb.1:
|
|
; RV64-NEXT: mv a3, a1
|
|
; RV64-NEXT: .LBB111_2:
|
|
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
|
; RV64-NEXT: sub a3, a2, a1
|
|
; RV64-NEXT: sltu a2, a2, a3
|
|
; RV64-NEXT: addi a2, a2, -1
|
|
; RV64-NEXT: srli a1, a1, 3
|
|
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
|
|
; RV64-NEXT: vslidedown.vx v0, v0, a1
|
|
; RV64-NEXT: and a2, a2, a3
|
|
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
|
|
; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t
|
|
; RV64-NEXT: ret
|
|
%eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
|
|
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
|
|
call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
|
ret void
|
|
}
|