If the element type of the vector we're extracting from doesn't match the type we're inserting into, we can't directly insert or extract the subvector.
369 lines
11 KiB
LLVM
369 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+zbb -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
|
|
|
|
define i64 @reduce_add(i64 %x, <4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_add:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vmv.s.x v10, a0
|
|
; CHECK-NEXT: vredsum.vs v8, v8, v10
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
|
|
%res = add i64 %rdx, %x
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_add2(<4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_add2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
|
|
; CHECK-NEXT: vmv.v.i v10, 8
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredsum.vs v8, v8, v10
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
|
|
%res = add i64 %rdx, 8
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_and(i64 %x, <4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_and:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredand.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a1, v8
|
|
; CHECK-NEXT: and a0, a1, a0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
|
|
%res = and i64 %rdx, %x
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_and2(<4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_and2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredand.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: andi a0, a0, 8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
|
|
%res = and i64 %rdx, 8
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_or(i64 %x, <4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_or:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredor.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a1, v8
|
|
; CHECK-NEXT: or a0, a1, a0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
|
|
%res = or i64 %rdx, %x
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_or2(<4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_or2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredor.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: ori a0, a0, 8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
|
|
%res = or i64 %rdx, 8
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_xor(i64 %x, <4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_xor:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vmv.s.x v10, a0
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v10
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
|
|
%res = xor i64 %rdx, %x
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_xor2(<4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_xor2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vmv.s.x v10, zero
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v10
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: andi a0, a0, 8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
|
|
%res = and i64 %rdx, 8
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_umax(i64 %x, <4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_umax:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredmaxu.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a1, v8
|
|
; CHECK-NEXT: maxu a0, a1, a0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
|
|
%res = call i64 @llvm.umax.i64(i64 %rdx, i64 %x)
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_umax2(<4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_umax2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredmaxu.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: li a1, 8
|
|
; CHECK-NEXT: maxu a0, a0, a1
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
|
|
%res = call i64 @llvm.umax.i64(i64 %rdx, i64 8)
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_umin(i64 %x, <4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_umin:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredminu.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a1, v8
|
|
; CHECK-NEXT: minu a0, a1, a0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
|
|
%res = call i64 @llvm.umin.i64(i64 %rdx, i64 %x)
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_umin2(<4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_umin2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredminu.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: li a1, 8
|
|
; CHECK-NEXT: minu a0, a0, a1
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
|
|
%res = call i64 @llvm.umin.i64(i64 %rdx, i64 8)
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_smax(i64 %x, <4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_smax:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a1, v8
|
|
; CHECK-NEXT: max a0, a1, a0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
|
|
%res = call i64 @llvm.smax.i64(i64 %rdx, i64 %x)
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_smax2(<4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_smax2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: li a1, 8
|
|
; CHECK-NEXT: max a0, a0, a1
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
|
|
%res = call i64 @llvm.smax.i64(i64 %rdx, i64 8)
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_smin(i64 %x, <4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_smin:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a1, v8
|
|
; CHECK-NEXT: min a0, a1, a0
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
|
|
%res = call i64 @llvm.smin.i64(i64 %rdx, i64 %x)
|
|
ret i64 %res
|
|
}
|
|
|
|
define i64 @reduce_smin2(<4 x i64> %v) {
|
|
; CHECK-LABEL: reduce_smin2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
|
; CHECK-NEXT: vredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: li a1, 8
|
|
; CHECK-NEXT: min a0, a0, a1
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
|
|
%res = call i64 @llvm.smin.i64(i64 %rdx, i64 8)
|
|
ret i64 %res
|
|
}
|
|
|
|
define float @reduce_fadd(float %x, <4 x float> %v) {
|
|
; CHECK-LABEL: reduce_fadd:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v)
|
|
ret float %rdx
|
|
}
|
|
|
|
define float @reduce_fadd2(float %x, <4 x float> %v) {
|
|
; CHECK-LABEL: reduce_fadd2:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v9, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa0, v8
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v)
|
|
%res = fadd fast float %rdx, %x
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fadd3(float %x, <4 x float> %v, ptr %rdxptr) {
|
|
; CHECK-LABEL: reduce_fadd3:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v9
|
|
; CHECK-NEXT: vfmv.f.s fa5, v8
|
|
; CHECK-NEXT: fadd.s fa0, fa5, fa0
|
|
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; CHECK-NEXT: vse32.v v8, (a0)
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v)
|
|
%res = fadd fast float %rdx, %x
|
|
store float %rdx, ptr %rdxptr
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fadd4(float %x, float %y, <4 x float> %v, <4 x float> %w) {
|
|
; CHECK-LABEL: reduce_fadd4:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfmv.s.f v10, fa0
|
|
; CHECK-NEXT: vfredusum.vs v8, v8, v10
|
|
; CHECK-NEXT: vfmv.f.s fa5, v8
|
|
; CHECK-NEXT: vfmv.s.f v8, fa1
|
|
; CHECK-NEXT: vfredusum.vs v8, v9, v8
|
|
; CHECK-NEXT: vfmv.f.s fa4, v8
|
|
; CHECK-NEXT: fdiv.s fa0, fa5, fa4
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v)
|
|
%rdx2 = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %w)
|
|
%res = fadd fast float %rdx, %x
|
|
%res2 = fadd fast float %rdx2, %y
|
|
%div = fdiv fast float %res, %res2
|
|
ret float %div
|
|
}
|
|
|
|
define float @reduce_fmax(float %x, <4 x float> %v) {
|
|
; CHECK-LABEL: reduce_fmax:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfredmax.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa5, v8
|
|
; CHECK-NEXT: fmax.s fa0, fa0, fa5
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
|
|
%res = call float @llvm.maxnum.f32(float %x, float %rdx)
|
|
ret float %res
|
|
}
|
|
|
|
define float @reduce_fmin(float %x, <4 x float> %v) {
|
|
; CHECK-LABEL: reduce_fmin:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
|
|
; CHECK-NEXT: vfredmin.vs v8, v8, v8
|
|
; CHECK-NEXT: vfmv.f.s fa5, v8
|
|
; CHECK-NEXT: fmin.s fa0, fa0, fa5
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
|
|
%res = call float @llvm.minnum.f32(float %x, float %rdx)
|
|
ret float %res
|
|
}
|
|
|
|
; Function Attrs: nofree nosync nounwind readnone willreturn
|
|
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
|
|
declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
|
|
declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
|
|
declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
|
|
declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
|
|
declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
|
|
declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
|
|
declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
|
|
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
|
|
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
|
|
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
|
|
declare i64 @llvm.umax.i64(i64, i64)
|
|
declare i64 @llvm.umin.i64(i64, i64)
|
|
declare i64 @llvm.smax.i64(i64, i64)
|
|
declare i64 @llvm.smin.i64(i64, i64)
|
|
declare float @llvm.maxnum.f32(float ,float)
|
|
declare float @llvm.minnum.f32(float ,float)
|
|
|
|
define void @crash(<2 x i32> %0) {
|
|
; CHECK-LABEL: crash:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
|
|
; CHECK-NEXT: vmv.v.i v8, 0
|
|
; CHECK-NEXT: vmv.s.x v9, a0
|
|
; CHECK-NEXT: vredsum.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: sb a0, 0(zero)
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%1 = extractelement <2 x i32> %0, i64 0
|
|
%2 = tail call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> zeroinitializer)
|
|
%3 = zext i16 %2 to i32
|
|
%op.rdx = add i32 %1, %3
|
|
%conv18.us = trunc i32 %op.rdx to i8
|
|
store i8 %conv18.us, ptr null, align 1
|
|
ret void
|
|
}
|
|
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
|