diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index 29038df652e7..d7ccfce0fbaa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -308,3 +308,244 @@ define <32 x i32> @v32i32_v4i32(<4 x i32>) { %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <32 x i32> ret <32 x i32> %2 } + +; TODO: This case should be a simple vnsrl, but gets scalarized instead +define <32 x i8> @vnsrl_v32i8_v64i8(<64 x i8> %in) { +; RV32-LABEL: v32i8_v64i8: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -128 +; RV32-NEXT: .cfi_def_cfa_offset 128 +; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 128 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: li a0, 64 +; RV32-NEXT: mv a1, sp +; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV32-NEXT: vse8.v v8, (a1) +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 3 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 5 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 7 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 9 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 11 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 13 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 15 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslidedown.vi v12, v8, 17 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslidedown.vi v12, v8, 19 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslidedown.vi v12, v8, 21 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslidedown.vi v12, v8, 23 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslidedown.vi v12, v8, 25 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslidedown.vi v12, v8, 27 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslidedown.vi v12, v8, 29 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslidedown.vi v8, v8, 31 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vslide1down.vx v8, v10, a0 +; RV32-NEXT: lbu a0, 33(sp) +; RV32-NEXT: lbu a1, 35(sp) +; RV32-NEXT: lbu a2, 37(sp) +; RV32-NEXT: lbu a3, 39(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: lbu a0, 41(sp) +; RV32-NEXT: lbu a1, 43(sp) +; RV32-NEXT: lbu a2, 45(sp) +; RV32-NEXT: lbu a3, 47(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: lbu a0, 49(sp) +; RV32-NEXT: lbu a1, 51(sp) +; RV32-NEXT: lbu a2, 53(sp) +; RV32-NEXT: lbu a3, 55(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: lbu a0, 57(sp) +; RV32-NEXT: lbu a1, 59(sp) +; RV32-NEXT: lbu a2, 61(sp) +; RV32-NEXT: lbu a3, 63(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: addi sp, s0, -128 +; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 128 +; RV32-NEXT: ret +; +; RV64-LABEL: v32i8_v64i8: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -128 +; RV64-NEXT: .cfi_def_cfa_offset 128 +; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 128 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: mv a1, sp +; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV64-NEXT: vse8.v v8, (a1) +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 1 +; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 3 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 5 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 7 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 9 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 11 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 13 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 15 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslidedown.vi v12, v8, 17 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslidedown.vi v12, v8, 19 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslidedown.vi v12, v8, 21 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslidedown.vi v12, v8, 23 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslidedown.vi v12, v8, 25 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslidedown.vi v12, v8, 27 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslidedown.vi v12, v8, 29 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslidedown.vi v8, v8, 31 +; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: vslide1down.vx v8, v10, a0 +; RV64-NEXT: lbu a0, 33(sp) +; RV64-NEXT: lbu a1, 35(sp) +; RV64-NEXT: lbu a2, 37(sp) +; RV64-NEXT: lbu a3, 39(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: lbu a0, 41(sp) +; RV64-NEXT: lbu a1, 43(sp) +; RV64-NEXT: lbu a2, 45(sp) +; RV64-NEXT: lbu a3, 47(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: lbu a0, 49(sp) +; RV64-NEXT: lbu a1, 51(sp) +; RV64-NEXT: lbu a2, 53(sp) +; RV64-NEXT: lbu a3, 55(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: lbu a0, 57(sp) +; RV64-NEXT: lbu a1, 59(sp) +; RV64-NEXT: lbu a2, 61(sp) +; RV64-NEXT: lbu a3, 63(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: addi sp, s0, -128 +; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 128 +; RV64-NEXT: ret + %res = shufflevector <64 x i8> %in, <64 x i8> poison, <32 x i32> + ret <32 x i8> %res +}