This patch try to get rid of vsetvl implict vl/vtype def-use chain and improve the register allocation quality by moving the vsetvl insertion pass after RVV register allocation It will gain the benefit for the following optimization from 1. unblock scheduler's constraints by removing vl/vtype def-use chain 2. Support RVV re-materialization 3. Support partial spill This patch add a new option `-riscv-vsetvl-after-rvv-regalloc=<1|0>` to control this feature and default set as disable.
1212 lines
40 KiB
LLVM
1212 lines
40 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
|
|
; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
|
|
|
|
define i8 @explode_2xi8(<2 x i8> %v) {
|
|
; CHECK-LABEL: explode_2xi8:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <2 x i8> %v, i32 0
|
|
%e1 = extractelement <2 x i8> %v, i32 1
|
|
%add0 = xor i8 %e0, %e1
|
|
ret i8 %add0
|
|
}
|
|
|
|
define i8 @explode_4xi8(<4 x i8> %v) {
|
|
; CHECK-LABEL: explode_4xi8:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 2
|
|
; CHECK-NEXT: vmv.x.s a0, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 3
|
|
; CHECK-NEXT: vmv.x.s a1, v9
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s a2, v8
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: add a0, a2, a0
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <4 x i8> %v, i32 0
|
|
%e1 = extractelement <4 x i8> %v, i32 1
|
|
%e2 = extractelement <4 x i8> %v, i32 2
|
|
%e3 = extractelement <4 x i8> %v, i32 3
|
|
%add0 = xor i8 %e0, %e1
|
|
%add1 = add i8 %add0, %e2
|
|
%add2 = add i8 %add1, %e3
|
|
ret i8 %add2
|
|
}
|
|
|
|
|
|
define i8 @explode_8xi8(<8 x i8> %v) {
|
|
; CHECK-LABEL: explode_8xi8:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 2
|
|
; CHECK-NEXT: vmv.x.s a0, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 3
|
|
; CHECK-NEXT: vmv.x.s a1, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 4
|
|
; CHECK-NEXT: vmv.x.s a2, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 5
|
|
; CHECK-NEXT: vmv.x.s a3, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 6
|
|
; CHECK-NEXT: vmv.x.s a4, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 7
|
|
; CHECK-NEXT: vmv.x.s a5, v9
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s a6, v8
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: add a0, a6, a0
|
|
; CHECK-NEXT: add a2, a2, a3
|
|
; CHECK-NEXT: add a2, a2, a4
|
|
; CHECK-NEXT: add a0, a0, a2
|
|
; CHECK-NEXT: add a0, a0, a5
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <8 x i8> %v, i32 0
|
|
%e1 = extractelement <8 x i8> %v, i32 1
|
|
%e2 = extractelement <8 x i8> %v, i32 2
|
|
%e3 = extractelement <8 x i8> %v, i32 3
|
|
%e4 = extractelement <8 x i8> %v, i32 4
|
|
%e5 = extractelement <8 x i8> %v, i32 5
|
|
%e6 = extractelement <8 x i8> %v, i32 6
|
|
%e7 = extractelement <8 x i8> %v, i32 7
|
|
%add0 = xor i8 %e0, %e1
|
|
%add1 = add i8 %add0, %e2
|
|
%add2 = add i8 %add1, %e3
|
|
%add3 = add i8 %add2, %e4
|
|
%add4 = add i8 %add3, %e5
|
|
%add5 = add i8 %add4, %e6
|
|
%add6 = add i8 %add5, %e7
|
|
ret i8 %add6
|
|
}
|
|
|
|
define i8 @explode_16xi8(<16 x i8> %v) {
|
|
; CHECK-LABEL: explode_16xi8:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 2
|
|
; CHECK-NEXT: vmv.x.s a0, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 3
|
|
; CHECK-NEXT: vmv.x.s a1, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 4
|
|
; CHECK-NEXT: vmv.x.s a2, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 5
|
|
; CHECK-NEXT: vmv.x.s a3, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 6
|
|
; CHECK-NEXT: vmv.x.s a4, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 7
|
|
; CHECK-NEXT: vmv.x.s a5, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 8
|
|
; CHECK-NEXT: vmv.x.s a6, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 9
|
|
; CHECK-NEXT: vmv.x.s a7, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 10
|
|
; CHECK-NEXT: vmv.x.s t0, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 11
|
|
; CHECK-NEXT: vmv.x.s t1, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 12
|
|
; CHECK-NEXT: vmv.x.s t2, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 13
|
|
; CHECK-NEXT: vmv.x.s t3, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 14
|
|
; CHECK-NEXT: vmv.x.s t4, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 15
|
|
; CHECK-NEXT: vmv.x.s t5, v9
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s t6, v8
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: add a0, t6, a0
|
|
; CHECK-NEXT: add a2, a2, a3
|
|
; CHECK-NEXT: add a2, a2, a4
|
|
; CHECK-NEXT: add a0, a0, a2
|
|
; CHECK-NEXT: add a5, a5, a6
|
|
; CHECK-NEXT: add a5, a5, a7
|
|
; CHECK-NEXT: add a5, a5, t0
|
|
; CHECK-NEXT: add a0, a0, a5
|
|
; CHECK-NEXT: add t1, t1, t2
|
|
; CHECK-NEXT: add t1, t1, t3
|
|
; CHECK-NEXT: add t1, t1, t4
|
|
; CHECK-NEXT: add t1, t1, t5
|
|
; CHECK-NEXT: add a0, a0, t1
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <16 x i8> %v, i32 0
|
|
%e1 = extractelement <16 x i8> %v, i32 1
|
|
%e2 = extractelement <16 x i8> %v, i32 2
|
|
%e3 = extractelement <16 x i8> %v, i32 3
|
|
%e4 = extractelement <16 x i8> %v, i32 4
|
|
%e5 = extractelement <16 x i8> %v, i32 5
|
|
%e6 = extractelement <16 x i8> %v, i32 6
|
|
%e7 = extractelement <16 x i8> %v, i32 7
|
|
%e8 = extractelement <16 x i8> %v, i32 8
|
|
%e9 = extractelement <16 x i8> %v, i32 9
|
|
%e10 = extractelement <16 x i8> %v, i32 10
|
|
%e11 = extractelement <16 x i8> %v, i32 11
|
|
%e12 = extractelement <16 x i8> %v, i32 12
|
|
%e13 = extractelement <16 x i8> %v, i32 13
|
|
%e14 = extractelement <16 x i8> %v, i32 14
|
|
%e15 = extractelement <16 x i8> %v, i32 15
|
|
%add0 = xor i8 %e0, %e1
|
|
%add1 = add i8 %add0, %e2
|
|
%add2 = add i8 %add1, %e3
|
|
%add3 = add i8 %add2, %e4
|
|
%add4 = add i8 %add3, %e5
|
|
%add5 = add i8 %add4, %e6
|
|
%add6 = add i8 %add5, %e7
|
|
%add7 = add i8 %add6, %e8
|
|
%add8 = add i8 %add7, %e9
|
|
%add9 = add i8 %add8, %e10
|
|
%add10 = add i8 %add9, %e11
|
|
%add11 = add i8 %add10, %e12
|
|
%add12 = add i8 %add11, %e13
|
|
%add13 = add i8 %add12, %e14
|
|
%add14 = add i8 %add13, %e15
|
|
ret i8 %add14
|
|
}
|
|
|
|
define i16 @explode_2xi16(<2 x i16> %v) {
|
|
; CHECK-LABEL: explode_2xi16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <2 x i16> %v, i32 0
|
|
%e1 = extractelement <2 x i16> %v, i32 1
|
|
%add0 = xor i16 %e0, %e1
|
|
ret i16 %add0
|
|
}
|
|
|
|
define i16 @explode_4xi16(<4 x i16> %v) {
|
|
; CHECK-LABEL: explode_4xi16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 2
|
|
; CHECK-NEXT: vmv.x.s a0, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 3
|
|
; CHECK-NEXT: vmv.x.s a1, v9
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s a2, v8
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: add a0, a2, a0
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <4 x i16> %v, i32 0
|
|
%e1 = extractelement <4 x i16> %v, i32 1
|
|
%e2 = extractelement <4 x i16> %v, i32 2
|
|
%e3 = extractelement <4 x i16> %v, i32 3
|
|
%add0 = xor i16 %e0, %e1
|
|
%add1 = add i16 %add0, %e2
|
|
%add2 = add i16 %add1, %e3
|
|
ret i16 %add2
|
|
}
|
|
|
|
|
|
define i16 @explode_8xi16(<8 x i16> %v) {
|
|
; CHECK-LABEL: explode_8xi16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 2
|
|
; CHECK-NEXT: vmv.x.s a0, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 3
|
|
; CHECK-NEXT: vmv.x.s a1, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 4
|
|
; CHECK-NEXT: vmv.x.s a2, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 5
|
|
; CHECK-NEXT: vmv.x.s a3, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 6
|
|
; CHECK-NEXT: vmv.x.s a4, v9
|
|
; CHECK-NEXT: vslidedown.vi v9, v8, 7
|
|
; CHECK-NEXT: vmv.x.s a5, v9
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s a6, v8
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: add a0, a6, a0
|
|
; CHECK-NEXT: add a2, a2, a3
|
|
; CHECK-NEXT: add a2, a2, a4
|
|
; CHECK-NEXT: add a0, a0, a2
|
|
; CHECK-NEXT: add a0, a0, a5
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <8 x i16> %v, i32 0
|
|
%e1 = extractelement <8 x i16> %v, i32 1
|
|
%e2 = extractelement <8 x i16> %v, i32 2
|
|
%e3 = extractelement <8 x i16> %v, i32 3
|
|
%e4 = extractelement <8 x i16> %v, i32 4
|
|
%e5 = extractelement <8 x i16> %v, i32 5
|
|
%e6 = extractelement <8 x i16> %v, i32 6
|
|
%e7 = extractelement <8 x i16> %v, i32 7
|
|
%add0 = xor i16 %e0, %e1
|
|
%add1 = add i16 %add0, %e2
|
|
%add2 = add i16 %add1, %e3
|
|
%add3 = add i16 %add2, %e4
|
|
%add4 = add i16 %add3, %e5
|
|
%add5 = add i16 %add4, %e6
|
|
%add6 = add i16 %add5, %e7
|
|
ret i16 %add6
|
|
}
|
|
|
|
define i16 @explode_16xi16(<16 x i16> %v) {
|
|
; CHECK-LABEL: explode_16xi16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 2
|
|
; CHECK-NEXT: vmv.x.s a0, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 3
|
|
; CHECK-NEXT: vmv.x.s a1, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 4
|
|
; CHECK-NEXT: vmv.x.s a2, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 5
|
|
; CHECK-NEXT: vmv.x.s a3, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 6
|
|
; CHECK-NEXT: vmv.x.s a4, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 7
|
|
; CHECK-NEXT: vmv.x.s a5, v10
|
|
; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 8
|
|
; CHECK-NEXT: vmv.x.s a6, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 9
|
|
; CHECK-NEXT: vmv.x.s a7, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 10
|
|
; CHECK-NEXT: vmv.x.s t0, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 11
|
|
; CHECK-NEXT: vmv.x.s t1, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 12
|
|
; CHECK-NEXT: vmv.x.s t2, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 13
|
|
; CHECK-NEXT: vmv.x.s t3, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 14
|
|
; CHECK-NEXT: vmv.x.s t4, v10
|
|
; CHECK-NEXT: vslidedown.vi v10, v8, 15
|
|
; CHECK-NEXT: vmv.x.s t5, v10
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s t6, v8
|
|
; CHECK-NEXT: add a0, a0, a1
|
|
; CHECK-NEXT: add a0, t6, a0
|
|
; CHECK-NEXT: add a2, a2, a3
|
|
; CHECK-NEXT: add a2, a2, a4
|
|
; CHECK-NEXT: add a0, a0, a2
|
|
; CHECK-NEXT: add a5, a5, a6
|
|
; CHECK-NEXT: add a5, a5, a7
|
|
; CHECK-NEXT: add a5, a5, t0
|
|
; CHECK-NEXT: add a0, a0, a5
|
|
; CHECK-NEXT: add t1, t1, t2
|
|
; CHECK-NEXT: add t1, t1, t3
|
|
; CHECK-NEXT: add t1, t1, t4
|
|
; CHECK-NEXT: add t1, t1, t5
|
|
; CHECK-NEXT: add a0, a0, t1
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <16 x i16> %v, i32 0
|
|
%e1 = extractelement <16 x i16> %v, i32 1
|
|
%e2 = extractelement <16 x i16> %v, i32 2
|
|
%e3 = extractelement <16 x i16> %v, i32 3
|
|
%e4 = extractelement <16 x i16> %v, i32 4
|
|
%e5 = extractelement <16 x i16> %v, i32 5
|
|
%e6 = extractelement <16 x i16> %v, i32 6
|
|
%e7 = extractelement <16 x i16> %v, i32 7
|
|
%e8 = extractelement <16 x i16> %v, i32 8
|
|
%e9 = extractelement <16 x i16> %v, i32 9
|
|
%e10 = extractelement <16 x i16> %v, i32 10
|
|
%e11 = extractelement <16 x i16> %v, i32 11
|
|
%e12 = extractelement <16 x i16> %v, i32 12
|
|
%e13 = extractelement <16 x i16> %v, i32 13
|
|
%e14 = extractelement <16 x i16> %v, i32 14
|
|
%e15 = extractelement <16 x i16> %v, i32 15
|
|
%add0 = xor i16 %e0, %e1
|
|
%add1 = add i16 %add0, %e2
|
|
%add2 = add i16 %add1, %e3
|
|
%add3 = add i16 %add2, %e4
|
|
%add4 = add i16 %add3, %e5
|
|
%add5 = add i16 %add4, %e6
|
|
%add6 = add i16 %add5, %e7
|
|
%add7 = add i16 %add6, %e8
|
|
%add8 = add i16 %add7, %e9
|
|
%add9 = add i16 %add8, %e10
|
|
%add10 = add i16 %add9, %e11
|
|
%add11 = add i16 %add10, %e12
|
|
%add12 = add i16 %add11, %e13
|
|
%add13 = add i16 %add12, %e14
|
|
%add14 = add i16 %add13, %e15
|
|
ret i16 %add14
|
|
}
|
|
|
|
define i32 @explode_2xi32(<2 x i32> %v) {
|
|
; CHECK-LABEL: explode_2xi32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; CHECK-NEXT: vmv.s.x v9, zero
|
|
; CHECK-NEXT: vredxor.vs v8, v8, v9
|
|
; CHECK-NEXT: vmv.x.s a0, v8
|
|
; CHECK-NEXT: ret
|
|
%e0 = extractelement <2 x i32> %v, i32 0
|
|
%e1 = extractelement <2 x i32> %v, i32 1
|
|
%add0 = xor i32 %e0, %e1
|
|
ret i32 %add0
|
|
}
|
|
|
|
define i32 @explode_4xi32(<4 x i32> %v) {
|
|
; RV32-LABEL: explode_4xi32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v9, v8, 2
|
|
; RV32-NEXT: vmv.x.s a0, v9
|
|
; RV32-NEXT: vslidedown.vi v9, v8, 3
|
|
; RV32-NEXT: vmv.x.s a1, v9
|
|
; RV32-NEXT: vmv.s.x v9, zero
|
|
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; RV32-NEXT: vredxor.vs v8, v8, v9
|
|
; RV32-NEXT: vmv.x.s a2, v8
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add a0, a2, a0
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: explode_4xi32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v9, v8, 2
|
|
; RV64-NEXT: vmv.x.s a0, v9
|
|
; RV64-NEXT: vslidedown.vi v9, v8, 3
|
|
; RV64-NEXT: vmv.x.s a1, v9
|
|
; RV64-NEXT: vmv.s.x v9, zero
|
|
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; RV64-NEXT: vredxor.vs v8, v8, v9
|
|
; RV64-NEXT: vmv.x.s a2, v8
|
|
; RV64-NEXT: add a0, a0, a1
|
|
; RV64-NEXT: addw a0, a2, a0
|
|
; RV64-NEXT: ret
|
|
%e0 = extractelement <4 x i32> %v, i32 0
|
|
%e1 = extractelement <4 x i32> %v, i32 1
|
|
%e2 = extractelement <4 x i32> %v, i32 2
|
|
%e3 = extractelement <4 x i32> %v, i32 3
|
|
%add0 = xor i32 %e0, %e1
|
|
%add1 = add i32 %add0, %e2
|
|
%add2 = add i32 %add1, %e3
|
|
ret i32 %add2
|
|
}
|
|
|
|
|
|
define i32 @explode_8xi32(<8 x i32> %v) {
|
|
; RV32-LABEL: explode_8xi32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v10, v8, 2
|
|
; RV32-NEXT: vmv.x.s a0, v10
|
|
; RV32-NEXT: vslidedown.vi v10, v8, 3
|
|
; RV32-NEXT: vmv.x.s a1, v10
|
|
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v10, v8, 4
|
|
; RV32-NEXT: vmv.x.s a2, v10
|
|
; RV32-NEXT: vslidedown.vi v10, v8, 5
|
|
; RV32-NEXT: vmv.x.s a3, v10
|
|
; RV32-NEXT: vslidedown.vi v10, v8, 6
|
|
; RV32-NEXT: vmv.x.s a4, v10
|
|
; RV32-NEXT: vslidedown.vi v10, v8, 7
|
|
; RV32-NEXT: vmv.x.s a5, v10
|
|
; RV32-NEXT: vmv.s.x v9, zero
|
|
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; RV32-NEXT: vredxor.vs v8, v8, v9
|
|
; RV32-NEXT: vmv.x.s a6, v8
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add a0, a6, a0
|
|
; RV32-NEXT: add a2, a2, a3
|
|
; RV32-NEXT: add a2, a2, a4
|
|
; RV32-NEXT: add a0, a0, a2
|
|
; RV32-NEXT: add a0, a0, a5
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: explode_8xi32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v10, v8, 2
|
|
; RV64-NEXT: vmv.x.s a0, v10
|
|
; RV64-NEXT: vslidedown.vi v10, v8, 3
|
|
; RV64-NEXT: vmv.x.s a1, v10
|
|
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v10, v8, 4
|
|
; RV64-NEXT: vmv.x.s a2, v10
|
|
; RV64-NEXT: vslidedown.vi v10, v8, 5
|
|
; RV64-NEXT: vmv.x.s a3, v10
|
|
; RV64-NEXT: vslidedown.vi v10, v8, 6
|
|
; RV64-NEXT: vmv.x.s a4, v10
|
|
; RV64-NEXT: vslidedown.vi v10, v8, 7
|
|
; RV64-NEXT: vmv.x.s a5, v10
|
|
; RV64-NEXT: vmv.s.x v9, zero
|
|
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; RV64-NEXT: vredxor.vs v8, v8, v9
|
|
; RV64-NEXT: vmv.x.s a6, v8
|
|
; RV64-NEXT: add a0, a0, a1
|
|
; RV64-NEXT: add a0, a6, a0
|
|
; RV64-NEXT: add a2, a2, a3
|
|
; RV64-NEXT: add a2, a2, a4
|
|
; RV64-NEXT: add a0, a0, a2
|
|
; RV64-NEXT: addw a0, a0, a5
|
|
; RV64-NEXT: ret
|
|
%e0 = extractelement <8 x i32> %v, i32 0
|
|
%e1 = extractelement <8 x i32> %v, i32 1
|
|
%e2 = extractelement <8 x i32> %v, i32 2
|
|
%e3 = extractelement <8 x i32> %v, i32 3
|
|
%e4 = extractelement <8 x i32> %v, i32 4
|
|
%e5 = extractelement <8 x i32> %v, i32 5
|
|
%e6 = extractelement <8 x i32> %v, i32 6
|
|
%e7 = extractelement <8 x i32> %v, i32 7
|
|
%add0 = xor i32 %e0, %e1
|
|
%add1 = add i32 %add0, %e2
|
|
%add2 = add i32 %add1, %e3
|
|
%add3 = add i32 %add2, %e4
|
|
%add4 = add i32 %add3, %e5
|
|
%add5 = add i32 %add4, %e6
|
|
%add6 = add i32 %add5, %e7
|
|
ret i32 %add6
|
|
}
|
|
|
|
define i32 @explode_16xi32(<16 x i32> %v) {
|
|
; RV32-LABEL: explode_16xi32:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: addi sp, sp, -128
|
|
; RV32-NEXT: .cfi_def_cfa_offset 128
|
|
; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: .cfi_offset ra, -4
|
|
; RV32-NEXT: .cfi_offset s0, -8
|
|
; RV32-NEXT: addi s0, sp, 128
|
|
; RV32-NEXT: .cfi_def_cfa s0, 0
|
|
; RV32-NEXT: andi sp, sp, -64
|
|
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 2
|
|
; RV32-NEXT: vmv.x.s a0, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 3
|
|
; RV32-NEXT: vmv.x.s a1, v12
|
|
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 4
|
|
; RV32-NEXT: vmv.x.s a2, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 5
|
|
; RV32-NEXT: vmv.x.s a3, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 6
|
|
; RV32-NEXT: vmv.x.s a4, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 7
|
|
; RV32-NEXT: vmv.x.s a5, v12
|
|
; RV32-NEXT: mv a6, sp
|
|
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
|
|
; RV32-NEXT: vse32.v v8, (a6)
|
|
; RV32-NEXT: lw a6, 32(sp)
|
|
; RV32-NEXT: lw a7, 36(sp)
|
|
; RV32-NEXT: lw t0, 40(sp)
|
|
; RV32-NEXT: lw t1, 44(sp)
|
|
; RV32-NEXT: lw t2, 48(sp)
|
|
; RV32-NEXT: lw t3, 52(sp)
|
|
; RV32-NEXT: lw t4, 56(sp)
|
|
; RV32-NEXT: lw t5, 60(sp)
|
|
; RV32-NEXT: vmv.s.x v9, zero
|
|
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; RV32-NEXT: vredxor.vs v8, v8, v9
|
|
; RV32-NEXT: vmv.x.s t6, v8
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add a0, t6, a0
|
|
; RV32-NEXT: add a2, a2, a3
|
|
; RV32-NEXT: add a2, a2, a4
|
|
; RV32-NEXT: add a0, a0, a2
|
|
; RV32-NEXT: add a5, a5, a6
|
|
; RV32-NEXT: add a0, a0, a5
|
|
; RV32-NEXT: add a7, a7, t0
|
|
; RV32-NEXT: add a7, a7, t1
|
|
; RV32-NEXT: add a0, a0, a7
|
|
; RV32-NEXT: add t2, t2, t3
|
|
; RV32-NEXT: add t2, t2, t4
|
|
; RV32-NEXT: add t2, t2, t5
|
|
; RV32-NEXT: add a0, a0, t2
|
|
; RV32-NEXT: addi sp, s0, -128
|
|
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: addi sp, sp, 128
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: explode_16xi32:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: addi sp, sp, -128
|
|
; RV64-NEXT: .cfi_def_cfa_offset 128
|
|
; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: .cfi_offset ra, -8
|
|
; RV64-NEXT: .cfi_offset s0, -16
|
|
; RV64-NEXT: addi s0, sp, 128
|
|
; RV64-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64-NEXT: andi sp, sp, -64
|
|
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 2
|
|
; RV64-NEXT: vmv.x.s a0, v12
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 3
|
|
; RV64-NEXT: vmv.x.s a1, v12
|
|
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 4
|
|
; RV64-NEXT: vmv.x.s a2, v12
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 5
|
|
; RV64-NEXT: vmv.x.s a3, v12
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 6
|
|
; RV64-NEXT: vmv.x.s a4, v12
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 7
|
|
; RV64-NEXT: vmv.x.s a5, v12
|
|
; RV64-NEXT: mv a6, sp
|
|
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
|
|
; RV64-NEXT: vse32.v v8, (a6)
|
|
; RV64-NEXT: lw a6, 32(sp)
|
|
; RV64-NEXT: lw a7, 36(sp)
|
|
; RV64-NEXT: lw t0, 40(sp)
|
|
; RV64-NEXT: lw t1, 44(sp)
|
|
; RV64-NEXT: lw t2, 48(sp)
|
|
; RV64-NEXT: lw t3, 52(sp)
|
|
; RV64-NEXT: lw t4, 56(sp)
|
|
; RV64-NEXT: lw t5, 60(sp)
|
|
; RV64-NEXT: vmv.s.x v9, zero
|
|
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; RV64-NEXT: vredxor.vs v8, v8, v9
|
|
; RV64-NEXT: vmv.x.s t6, v8
|
|
; RV64-NEXT: add a0, a0, a1
|
|
; RV64-NEXT: add a0, t6, a0
|
|
; RV64-NEXT: add a2, a2, a3
|
|
; RV64-NEXT: add a2, a2, a4
|
|
; RV64-NEXT: add a0, a0, a2
|
|
; RV64-NEXT: add a5, a5, a6
|
|
; RV64-NEXT: add a0, a0, a5
|
|
; RV64-NEXT: add a7, a7, t0
|
|
; RV64-NEXT: add a7, a7, t1
|
|
; RV64-NEXT: add a0, a0, a7
|
|
; RV64-NEXT: add t2, t2, t3
|
|
; RV64-NEXT: add t2, t2, t4
|
|
; RV64-NEXT: add t2, t2, t5
|
|
; RV64-NEXT: addw a0, a0, t2
|
|
; RV64-NEXT: addi sp, s0, -128
|
|
; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: addi sp, sp, 128
|
|
; RV64-NEXT: ret
|
|
%e0 = extractelement <16 x i32> %v, i32 0
|
|
%e1 = extractelement <16 x i32> %v, i32 1
|
|
%e2 = extractelement <16 x i32> %v, i32 2
|
|
%e3 = extractelement <16 x i32> %v, i32 3
|
|
%e4 = extractelement <16 x i32> %v, i32 4
|
|
%e5 = extractelement <16 x i32> %v, i32 5
|
|
%e6 = extractelement <16 x i32> %v, i32 6
|
|
%e7 = extractelement <16 x i32> %v, i32 7
|
|
%e8 = extractelement <16 x i32> %v, i32 8
|
|
%e9 = extractelement <16 x i32> %v, i32 9
|
|
%e10 = extractelement <16 x i32> %v, i32 10
|
|
%e11 = extractelement <16 x i32> %v, i32 11
|
|
%e12 = extractelement <16 x i32> %v, i32 12
|
|
%e13 = extractelement <16 x i32> %v, i32 13
|
|
%e14 = extractelement <16 x i32> %v, i32 14
|
|
%e15 = extractelement <16 x i32> %v, i32 15
|
|
%add0 = xor i32 %e0, %e1
|
|
%add1 = add i32 %add0, %e2
|
|
%add2 = add i32 %add1, %e3
|
|
%add3 = add i32 %add2, %e4
|
|
%add4 = add i32 %add3, %e5
|
|
%add5 = add i32 %add4, %e6
|
|
%add6 = add i32 %add5, %e7
|
|
%add7 = add i32 %add6, %e8
|
|
%add8 = add i32 %add7, %e9
|
|
%add9 = add i32 %add8, %e10
|
|
%add10 = add i32 %add9, %e11
|
|
%add11 = add i32 %add10, %e12
|
|
%add12 = add i32 %add11, %e13
|
|
%add13 = add i32 %add12, %e14
|
|
%add14 = add i32 %add13, %e15
|
|
ret i32 %add14
|
|
}
|
|
|
|
define i64 @explode_2xi64(<2 x i64> %v) {
|
|
; RV32-LABEL: explode_2xi64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vmv.s.x v9, zero
|
|
; RV32-NEXT: vredxor.vs v8, v8, v9
|
|
; RV32-NEXT: vmv.x.s a0, v8
|
|
; RV32-NEXT: li a1, 32
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
|
|
; RV32-NEXT: vsrl.vx v8, v8, a1
|
|
; RV32-NEXT: vmv.x.s a1, v8
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: explode_2xi64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV64-NEXT: vmv.s.x v9, zero
|
|
; RV64-NEXT: vredxor.vs v8, v8, v9
|
|
; RV64-NEXT: vmv.x.s a0, v8
|
|
; RV64-NEXT: ret
|
|
%e0 = extractelement <2 x i64> %v, i32 0
|
|
%e1 = extractelement <2 x i64> %v, i32 1
|
|
%add0 = xor i64 %e0, %e1
|
|
ret i64 %add0
|
|
}
|
|
|
|
define i64 @explode_4xi64(<4 x i64> %v) {
|
|
; RV32-LABEL: explode_4xi64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v10, v8, 2
|
|
; RV32-NEXT: li a0, 32
|
|
; RV32-NEXT: vsrl.vx v12, v10, a0
|
|
; RV32-NEXT: vmv.x.s a1, v12
|
|
; RV32-NEXT: vmv.x.s a2, v10
|
|
; RV32-NEXT: vslidedown.vi v10, v8, 3
|
|
; RV32-NEXT: vsrl.vx v12, v10, a0
|
|
; RV32-NEXT: vmv.x.s a3, v12
|
|
; RV32-NEXT: vmv.x.s a4, v10
|
|
; RV32-NEXT: vmv.s.x v9, zero
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vredxor.vs v8, v8, v9
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
|
|
; RV32-NEXT: vsrl.vx v9, v8, a0
|
|
; RV32-NEXT: vmv.x.s a0, v9
|
|
; RV32-NEXT: vmv.x.s a5, v8
|
|
; RV32-NEXT: add a2, a5, a2
|
|
; RV32-NEXT: sltu a5, a2, a5
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add a0, a0, a5
|
|
; RV32-NEXT: add a1, a0, a3
|
|
; RV32-NEXT: add a0, a2, a4
|
|
; RV32-NEXT: sltu a2, a0, a2
|
|
; RV32-NEXT: add a1, a1, a2
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: explode_4xi64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v10, v8, 2
|
|
; RV64-NEXT: vmv.x.s a0, v10
|
|
; RV64-NEXT: vslidedown.vi v10, v8, 3
|
|
; RV64-NEXT: vmv.x.s a1, v10
|
|
; RV64-NEXT: vmv.s.x v9, zero
|
|
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV64-NEXT: vredxor.vs v8, v8, v9
|
|
; RV64-NEXT: vmv.x.s a2, v8
|
|
; RV64-NEXT: add a0, a0, a1
|
|
; RV64-NEXT: add a0, a2, a0
|
|
; RV64-NEXT: ret
|
|
%e0 = extractelement <4 x i64> %v, i32 0
|
|
%e1 = extractelement <4 x i64> %v, i32 1
|
|
%e2 = extractelement <4 x i64> %v, i32 2
|
|
%e3 = extractelement <4 x i64> %v, i32 3
|
|
%add0 = xor i64 %e0, %e1
|
|
%add1 = add i64 %add0, %e2
|
|
%add2 = add i64 %add1, %e3
|
|
ret i64 %add2
|
|
}
|
|
|
|
|
|
define i64 @explode_8xi64(<8 x i64> %v) {
|
|
; RV32-LABEL: explode_8xi64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 2
|
|
; RV32-NEXT: li a0, 32
|
|
; RV32-NEXT: vsrl.vx v16, v12, a0
|
|
; RV32-NEXT: vmv.x.s a1, v16
|
|
; RV32-NEXT: vmv.x.s a2, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 3
|
|
; RV32-NEXT: vsrl.vx v16, v12, a0
|
|
; RV32-NEXT: vmv.x.s a3, v16
|
|
; RV32-NEXT: vmv.x.s a4, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 4
|
|
; RV32-NEXT: vsrl.vx v16, v12, a0
|
|
; RV32-NEXT: vmv.x.s a5, v16
|
|
; RV32-NEXT: vmv.x.s a6, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 5
|
|
; RV32-NEXT: vsrl.vx v16, v12, a0
|
|
; RV32-NEXT: vmv.x.s a7, v16
|
|
; RV32-NEXT: vmv.x.s t0, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 6
|
|
; RV32-NEXT: vsrl.vx v16, v12, a0
|
|
; RV32-NEXT: vmv.x.s t1, v16
|
|
; RV32-NEXT: vmv.x.s t2, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 7
|
|
; RV32-NEXT: vsrl.vx v16, v12, a0
|
|
; RV32-NEXT: vmv.x.s t3, v16
|
|
; RV32-NEXT: vmv.x.s t4, v12
|
|
; RV32-NEXT: vmv.s.x v9, zero
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vredxor.vs v8, v8, v9
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
|
|
; RV32-NEXT: vsrl.vx v9, v8, a0
|
|
; RV32-NEXT: vmv.x.s a0, v9
|
|
; RV32-NEXT: vmv.x.s t5, v8
|
|
; RV32-NEXT: add a2, t5, a2
|
|
; RV32-NEXT: sltu t5, a2, t5
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add a0, a0, t5
|
|
; RV32-NEXT: add a0, a0, a3
|
|
; RV32-NEXT: add a4, a2, a4
|
|
; RV32-NEXT: sltu a1, a4, a2
|
|
; RV32-NEXT: add a1, a1, a5
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add a6, a4, a6
|
|
; RV32-NEXT: sltu a1, a6, a4
|
|
; RV32-NEXT: add a1, a1, a7
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add t0, a6, t0
|
|
; RV32-NEXT: sltu a1, t0, a6
|
|
; RV32-NEXT: add a1, a1, t1
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add t2, t0, t2
|
|
; RV32-NEXT: sltu a1, t2, t0
|
|
; RV32-NEXT: add a1, a1, t3
|
|
; RV32-NEXT: add a1, a0, a1
|
|
; RV32-NEXT: add a0, t2, t4
|
|
; RV32-NEXT: sltu a2, a0, t2
|
|
; RV32-NEXT: add a1, a1, a2
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: explode_8xi64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: addi sp, sp, -128
|
|
; RV64-NEXT: .cfi_def_cfa_offset 128
|
|
; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: .cfi_offset ra, -8
|
|
; RV64-NEXT: .cfi_offset s0, -16
|
|
; RV64-NEXT: addi s0, sp, 128
|
|
; RV64-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64-NEXT: andi sp, sp, -64
|
|
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 2
|
|
; RV64-NEXT: vmv.x.s a0, v12
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 3
|
|
; RV64-NEXT: vmv.x.s a1, v12
|
|
; RV64-NEXT: mv a2, sp
|
|
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
|
|
; RV64-NEXT: vse64.v v8, (a2)
|
|
; RV64-NEXT: ld a2, 32(sp)
|
|
; RV64-NEXT: ld a3, 40(sp)
|
|
; RV64-NEXT: ld a4, 48(sp)
|
|
; RV64-NEXT: ld a5, 56(sp)
|
|
; RV64-NEXT: vmv.s.x v9, zero
|
|
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV64-NEXT: vredxor.vs v8, v8, v9
|
|
; RV64-NEXT: vmv.x.s a6, v8
|
|
; RV64-NEXT: add a0, a0, a1
|
|
; RV64-NEXT: add a0, a6, a0
|
|
; RV64-NEXT: add a0, a0, a2
|
|
; RV64-NEXT: add a3, a3, a4
|
|
; RV64-NEXT: add a0, a0, a3
|
|
; RV64-NEXT: add a0, a0, a5
|
|
; RV64-NEXT: addi sp, s0, -128
|
|
; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: addi sp, sp, 128
|
|
; RV64-NEXT: ret
|
|
%e0 = extractelement <8 x i64> %v, i32 0
|
|
%e1 = extractelement <8 x i64> %v, i32 1
|
|
%e2 = extractelement <8 x i64> %v, i32 2
|
|
%e3 = extractelement <8 x i64> %v, i32 3
|
|
%e4 = extractelement <8 x i64> %v, i32 4
|
|
%e5 = extractelement <8 x i64> %v, i32 5
|
|
%e6 = extractelement <8 x i64> %v, i32 6
|
|
%e7 = extractelement <8 x i64> %v, i32 7
|
|
%add0 = xor i64 %e0, %e1
|
|
%add1 = add i64 %add0, %e2
|
|
%add2 = add i64 %add1, %e3
|
|
%add3 = add i64 %add2, %e4
|
|
%add4 = add i64 %add3, %e5
|
|
%add5 = add i64 %add4, %e6
|
|
%add6 = add i64 %add5, %e7
|
|
ret i64 %add6
|
|
}
|
|
|
|
define i64 @explode_16xi64(<16 x i64> %v) {
|
|
; RV32-LABEL: explode_16xi64:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: addi sp, sp, -48
|
|
; RV32-NEXT: .cfi_def_cfa_offset 48
|
|
; RV32-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s9, 8(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s10, 4(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: sw s11, 0(sp) # 4-byte Folded Spill
|
|
; RV32-NEXT: .cfi_offset s0, -4
|
|
; RV32-NEXT: .cfi_offset s1, -8
|
|
; RV32-NEXT: .cfi_offset s2, -12
|
|
; RV32-NEXT: .cfi_offset s3, -16
|
|
; RV32-NEXT: .cfi_offset s4, -20
|
|
; RV32-NEXT: .cfi_offset s5, -24
|
|
; RV32-NEXT: .cfi_offset s6, -28
|
|
; RV32-NEXT: .cfi_offset s7, -32
|
|
; RV32-NEXT: .cfi_offset s8, -36
|
|
; RV32-NEXT: .cfi_offset s9, -40
|
|
; RV32-NEXT: .cfi_offset s10, -44
|
|
; RV32-NEXT: .cfi_offset s11, -48
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 2
|
|
; RV32-NEXT: li a0, 32
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s a1, v24
|
|
; RV32-NEXT: vmv.x.s a2, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 3
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s a3, v24
|
|
; RV32-NEXT: vmv.x.s a4, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 4
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s a5, v24
|
|
; RV32-NEXT: vmv.x.s a6, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 5
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s a7, v24
|
|
; RV32-NEXT: vmv.x.s t0, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 6
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s t1, v24
|
|
; RV32-NEXT: vmv.x.s t2, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 7
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s t3, v24
|
|
; RV32-NEXT: vmv.x.s t4, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 8
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s t5, v24
|
|
; RV32-NEXT: vmv.x.s t6, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 9
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s s0, v24
|
|
; RV32-NEXT: vmv.x.s s1, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 10
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s s2, v24
|
|
; RV32-NEXT: vmv.x.s s3, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 11
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s s4, v24
|
|
; RV32-NEXT: vmv.x.s s5, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 12
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s s6, v24
|
|
; RV32-NEXT: vmv.x.s s7, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 13
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.x.s s9, v24
|
|
; RV32-NEXT: vmv.x.s s8, v16
|
|
; RV32-NEXT: vslidedown.vi v16, v8, 14
|
|
; RV32-NEXT: vsrl.vx v24, v16, a0
|
|
; RV32-NEXT: vmv.s.x v17, zero
|
|
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV32-NEXT: vredxor.vs v17, v8, v17
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v8, v8, 15
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
|
|
; RV32-NEXT: vsrl.vx v18, v17, a0
|
|
; RV32-NEXT: vmv.x.s s10, v18
|
|
; RV32-NEXT: vmv.x.s s11, v17
|
|
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
|
|
; RV32-NEXT: vsrl.vx v0, v8, a0
|
|
; RV32-NEXT: add a1, s10, a1
|
|
; RV32-NEXT: add a2, s11, a2
|
|
; RV32-NEXT: sltu a0, a2, s11
|
|
; RV32-NEXT: add a0, a1, a0
|
|
; RV32-NEXT: add a0, a0, a3
|
|
; RV32-NEXT: add a4, a2, a4
|
|
; RV32-NEXT: sltu a1, a4, a2
|
|
; RV32-NEXT: add a1, a1, a5
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add a6, a4, a6
|
|
; RV32-NEXT: sltu a1, a6, a4
|
|
; RV32-NEXT: add a1, a1, a7
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add t0, a6, t0
|
|
; RV32-NEXT: sltu a1, t0, a6
|
|
; RV32-NEXT: add a1, a1, t1
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add t2, t0, t2
|
|
; RV32-NEXT: sltu a1, t2, t0
|
|
; RV32-NEXT: add a1, a1, t3
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add t4, t2, t4
|
|
; RV32-NEXT: sltu a1, t4, t2
|
|
; RV32-NEXT: add a1, a1, t5
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add t6, t4, t6
|
|
; RV32-NEXT: sltu a1, t6, t4
|
|
; RV32-NEXT: add a1, a1, s0
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add s1, t6, s1
|
|
; RV32-NEXT: sltu a1, s1, t6
|
|
; RV32-NEXT: add a1, a1, s2
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add s3, s1, s3
|
|
; RV32-NEXT: sltu a1, s3, s1
|
|
; RV32-NEXT: add a1, a1, s4
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add s5, s3, s5
|
|
; RV32-NEXT: sltu a1, s5, s3
|
|
; RV32-NEXT: add a1, a1, s6
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add s7, s5, s7
|
|
; RV32-NEXT: sltu a1, s7, s5
|
|
; RV32-NEXT: add a1, a1, s9
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: vmv.x.s a1, v24
|
|
; RV32-NEXT: add s8, s7, s8
|
|
; RV32-NEXT: sltu a2, s8, s7
|
|
; RV32-NEXT: add a1, a2, a1
|
|
; RV32-NEXT: vmv.x.s a2, v16
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: vmv.x.s a1, v0
|
|
; RV32-NEXT: add a2, s8, a2
|
|
; RV32-NEXT: sltu a3, a2, s8
|
|
; RV32-NEXT: add a1, a3, a1
|
|
; RV32-NEXT: add a1, a0, a1
|
|
; RV32-NEXT: vmv.x.s a0, v8
|
|
; RV32-NEXT: add a0, a2, a0
|
|
; RV32-NEXT: sltu a2, a0, a2
|
|
; RV32-NEXT: add a1, a1, a2
|
|
; RV32-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s10, 4(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: lw s11, 0(sp) # 4-byte Folded Reload
|
|
; RV32-NEXT: addi sp, sp, 48
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: explode_16xi64:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: addi sp, sp, -256
|
|
; RV64-NEXT: .cfi_def_cfa_offset 256
|
|
; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
|
|
; RV64-NEXT: .cfi_offset ra, -8
|
|
; RV64-NEXT: .cfi_offset s0, -16
|
|
; RV64-NEXT: addi s0, sp, 256
|
|
; RV64-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64-NEXT: andi sp, sp, -128
|
|
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v16, v8, 2
|
|
; RV64-NEXT: vmv.x.s a0, v16
|
|
; RV64-NEXT: vslidedown.vi v16, v8, 3
|
|
; RV64-NEXT: vmv.x.s a1, v16
|
|
; RV64-NEXT: mv a2, sp
|
|
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
|
|
; RV64-NEXT: vse64.v v8, (a2)
|
|
; RV64-NEXT: ld a2, 32(sp)
|
|
; RV64-NEXT: ld a3, 40(sp)
|
|
; RV64-NEXT: ld a4, 48(sp)
|
|
; RV64-NEXT: ld a5, 56(sp)
|
|
; RV64-NEXT: ld a6, 64(sp)
|
|
; RV64-NEXT: ld a7, 72(sp)
|
|
; RV64-NEXT: ld t0, 80(sp)
|
|
; RV64-NEXT: ld t1, 88(sp)
|
|
; RV64-NEXT: ld t2, 96(sp)
|
|
; RV64-NEXT: ld t3, 104(sp)
|
|
; RV64-NEXT: ld t4, 112(sp)
|
|
; RV64-NEXT: ld t5, 120(sp)
|
|
; RV64-NEXT: vmv.s.x v9, zero
|
|
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
|
|
; RV64-NEXT: vredxor.vs v8, v8, v9
|
|
; RV64-NEXT: vmv.x.s t6, v8
|
|
; RV64-NEXT: add a0, a0, a1
|
|
; RV64-NEXT: add a0, t6, a0
|
|
; RV64-NEXT: add a0, a0, a2
|
|
; RV64-NEXT: add a3, a3, a4
|
|
; RV64-NEXT: add a0, a0, a3
|
|
; RV64-NEXT: add a5, a5, a6
|
|
; RV64-NEXT: add a5, a5, a7
|
|
; RV64-NEXT: add a0, a0, a5
|
|
; RV64-NEXT: add t0, t0, t1
|
|
; RV64-NEXT: add t0, t0, t2
|
|
; RV64-NEXT: add t0, t0, t3
|
|
; RV64-NEXT: add a0, a0, t0
|
|
; RV64-NEXT: add t4, t4, t5
|
|
; RV64-NEXT: add a0, a0, t4
|
|
; RV64-NEXT: addi sp, s0, -256
|
|
; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
|
|
; RV64-NEXT: addi sp, sp, 256
|
|
; RV64-NEXT: ret
|
|
%e0 = extractelement <16 x i64> %v, i32 0
|
|
%e1 = extractelement <16 x i64> %v, i32 1
|
|
%e2 = extractelement <16 x i64> %v, i32 2
|
|
%e3 = extractelement <16 x i64> %v, i32 3
|
|
%e4 = extractelement <16 x i64> %v, i32 4
|
|
%e5 = extractelement <16 x i64> %v, i32 5
|
|
%e6 = extractelement <16 x i64> %v, i32 6
|
|
%e7 = extractelement <16 x i64> %v, i32 7
|
|
%e8 = extractelement <16 x i64> %v, i32 8
|
|
%e9 = extractelement <16 x i64> %v, i32 9
|
|
%e10 = extractelement <16 x i64> %v, i32 10
|
|
%e11 = extractelement <16 x i64> %v, i32 11
|
|
%e12 = extractelement <16 x i64> %v, i32 12
|
|
%e13 = extractelement <16 x i64> %v, i32 13
|
|
%e14 = extractelement <16 x i64> %v, i32 14
|
|
%e15 = extractelement <16 x i64> %v, i32 15
|
|
%add0 = xor i64 %e0, %e1
|
|
%add1 = add i64 %add0, %e2
|
|
%add2 = add i64 %add1, %e3
|
|
%add3 = add i64 %add2, %e4
|
|
%add4 = add i64 %add3, %e5
|
|
%add5 = add i64 %add4, %e6
|
|
%add6 = add i64 %add5, %e7
|
|
%add7 = add i64 %add6, %e8
|
|
%add8 = add i64 %add7, %e9
|
|
%add9 = add i64 %add8, %e10
|
|
%add10 = add i64 %add9, %e11
|
|
%add11 = add i64 %add10, %e12
|
|
%add12 = add i64 %add11, %e13
|
|
%add13 = add i64 %add12, %e14
|
|
%add14 = add i64 %add13, %e15
|
|
ret i64 %add14
|
|
}
|
|
|
|
define i32 @explode_16xi32_exact_vlen(<16 x i32> %v) vscale_range(2, 2) {
|
|
; RV32-LABEL: explode_16xi32_exact_vlen:
|
|
; RV32: # %bb.0:
|
|
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 2
|
|
; RV32-NEXT: vmv.x.s a0, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v8, 3
|
|
; RV32-NEXT: vmv.x.s a1, v12
|
|
; RV32-NEXT: vmv.x.s a2, v9
|
|
; RV32-NEXT: vslidedown.vi v12, v9, 1
|
|
; RV32-NEXT: vmv.x.s a3, v12
|
|
; RV32-NEXT: vslidedown.vi v12, v9, 2
|
|
; RV32-NEXT: vmv.x.s a4, v12
|
|
; RV32-NEXT: vslidedown.vi v9, v9, 3
|
|
; RV32-NEXT: vmv.x.s a5, v9
|
|
; RV32-NEXT: vmv.x.s a6, v10
|
|
; RV32-NEXT: vslidedown.vi v9, v10, 1
|
|
; RV32-NEXT: vmv.x.s a7, v9
|
|
; RV32-NEXT: vslidedown.vi v9, v10, 2
|
|
; RV32-NEXT: vmv.x.s t0, v9
|
|
; RV32-NEXT: vslidedown.vi v9, v10, 3
|
|
; RV32-NEXT: vmv.x.s t1, v9
|
|
; RV32-NEXT: vmv.x.s t2, v11
|
|
; RV32-NEXT: vslidedown.vi v9, v11, 1
|
|
; RV32-NEXT: vmv.x.s t3, v9
|
|
; RV32-NEXT: vslidedown.vi v9, v11, 2
|
|
; RV32-NEXT: vmv.x.s t4, v9
|
|
; RV32-NEXT: vslidedown.vi v9, v11, 3
|
|
; RV32-NEXT: vmv.x.s t5, v9
|
|
; RV32-NEXT: vmv.s.x v9, zero
|
|
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; RV32-NEXT: vredxor.vs v8, v8, v9
|
|
; RV32-NEXT: vmv.x.s t6, v8
|
|
; RV32-NEXT: add a0, a0, a1
|
|
; RV32-NEXT: add a0, t6, a0
|
|
; RV32-NEXT: add a2, a2, a3
|
|
; RV32-NEXT: add a2, a2, a4
|
|
; RV32-NEXT: add a0, a0, a2
|
|
; RV32-NEXT: add a5, a5, a6
|
|
; RV32-NEXT: add a5, a5, a7
|
|
; RV32-NEXT: add a5, a5, t0
|
|
; RV32-NEXT: add a0, a0, a5
|
|
; RV32-NEXT: add t1, t1, t2
|
|
; RV32-NEXT: add t1, t1, t3
|
|
; RV32-NEXT: add t1, t1, t4
|
|
; RV32-NEXT: add t1, t1, t5
|
|
; RV32-NEXT: add a0, a0, t1
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: explode_16xi32_exact_vlen:
|
|
; RV64: # %bb.0:
|
|
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 2
|
|
; RV64-NEXT: vmv.x.s a0, v12
|
|
; RV64-NEXT: vslidedown.vi v12, v8, 3
|
|
; RV64-NEXT: vmv.x.s a1, v12
|
|
; RV64-NEXT: vmv.x.s a2, v9
|
|
; RV64-NEXT: vslidedown.vi v12, v9, 1
|
|
; RV64-NEXT: vmv.x.s a3, v12
|
|
; RV64-NEXT: vslidedown.vi v12, v9, 2
|
|
; RV64-NEXT: vmv.x.s a4, v12
|
|
; RV64-NEXT: vslidedown.vi v9, v9, 3
|
|
; RV64-NEXT: vmv.x.s a5, v9
|
|
; RV64-NEXT: vmv.x.s a6, v10
|
|
; RV64-NEXT: vslidedown.vi v9, v10, 1
|
|
; RV64-NEXT: vmv.x.s a7, v9
|
|
; RV64-NEXT: vslidedown.vi v9, v10, 2
|
|
; RV64-NEXT: vmv.x.s t0, v9
|
|
; RV64-NEXT: vslidedown.vi v9, v10, 3
|
|
; RV64-NEXT: vmv.x.s t1, v9
|
|
; RV64-NEXT: vmv.x.s t2, v11
|
|
; RV64-NEXT: vslidedown.vi v9, v11, 1
|
|
; RV64-NEXT: vmv.x.s t3, v9
|
|
; RV64-NEXT: vslidedown.vi v9, v11, 2
|
|
; RV64-NEXT: vmv.x.s t4, v9
|
|
; RV64-NEXT: vslidedown.vi v9, v11, 3
|
|
; RV64-NEXT: vmv.x.s t5, v9
|
|
; RV64-NEXT: vmv.s.x v9, zero
|
|
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
|
; RV64-NEXT: vredxor.vs v8, v8, v9
|
|
; RV64-NEXT: vmv.x.s t6, v8
|
|
; RV64-NEXT: add a0, a0, a1
|
|
; RV64-NEXT: add a0, t6, a0
|
|
; RV64-NEXT: add a2, a2, a3
|
|
; RV64-NEXT: add a2, a2, a4
|
|
; RV64-NEXT: add a0, a0, a2
|
|
; RV64-NEXT: add a5, a5, a6
|
|
; RV64-NEXT: add a5, a5, a7
|
|
; RV64-NEXT: add a5, a5, t0
|
|
; RV64-NEXT: add a0, a0, a5
|
|
; RV64-NEXT: add t1, t1, t2
|
|
; RV64-NEXT: add t1, t1, t3
|
|
; RV64-NEXT: add t1, t1, t4
|
|
; RV64-NEXT: add t1, t1, t5
|
|
; RV64-NEXT: addw a0, a0, t1
|
|
; RV64-NEXT: ret
|
|
%e0 = extractelement <16 x i32> %v, i32 0
|
|
%e1 = extractelement <16 x i32> %v, i32 1
|
|
%e2 = extractelement <16 x i32> %v, i32 2
|
|
%e3 = extractelement <16 x i32> %v, i32 3
|
|
%e4 = extractelement <16 x i32> %v, i32 4
|
|
%e5 = extractelement <16 x i32> %v, i32 5
|
|
%e6 = extractelement <16 x i32> %v, i32 6
|
|
%e7 = extractelement <16 x i32> %v, i32 7
|
|
%e8 = extractelement <16 x i32> %v, i32 8
|
|
%e9 = extractelement <16 x i32> %v, i32 9
|
|
%e10 = extractelement <16 x i32> %v, i32 10
|
|
%e11 = extractelement <16 x i32> %v, i32 11
|
|
%e12 = extractelement <16 x i32> %v, i32 12
|
|
%e13 = extractelement <16 x i32> %v, i32 13
|
|
%e14 = extractelement <16 x i32> %v, i32 14
|
|
%e15 = extractelement <16 x i32> %v, i32 15
|
|
%add0 = xor i32 %e0, %e1
|
|
%add1 = add i32 %add0, %e2
|
|
%add2 = add i32 %add1, %e3
|
|
%add3 = add i32 %add2, %e4
|
|
%add4 = add i32 %add3, %e5
|
|
%add5 = add i32 %add4, %e6
|
|
%add6 = add i32 %add5, %e7
|
|
%add7 = add i32 %add6, %e8
|
|
%add8 = add i32 %add7, %e9
|
|
%add9 = add i32 %add8, %e10
|
|
%add10 = add i32 %add9, %e11
|
|
%add11 = add i32 %add10, %e12
|
|
%add12 = add i32 %add11, %e13
|
|
%add13 = add i32 %add12, %e14
|
|
%add14 = add i32 %add13, %e15
|
|
ret i32 %add14
|
|
}
|