Files
clang-p2996/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll
Philip Reames 144b2f579e [RISCV] Start vslide1down sequence with a dependency breaking splat (#72691)
If we are using entirely vslide1downs to initialize an otherwise undef
vector, we end up with an implicit_def as the source of the first
vslide1down. This register has to be allocated, and creates false
dependencies with surrounding code.

Instead, start our sequence with a vmv.v.x in the hopes of creating a
dependency breaking idiom. Unfortunately, it's not clear this will
actually work as due to the VL=0 special case for T.A. the hardware has
to work pretty hard to recognize that the vmv.v.x actually has no source
dependence. I don't think we can reasonable expect all hardware to have
optimized this case, but I also don't see any downside in prefering it.
2023-11-17 12:02:58 -08:00

32 lines
1.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=-v -o - %s | FileCheck --check-prefix=CHECK-NOV %s
; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=+v -o - %s | FileCheck --check-prefix=CHECK-V %s
; Reproducer for https://github.com/llvm/llvm-project/issues/55168.
; We should always return 1 (and not -1).
define i32 @test(i32 %call.i) {
; CHECK-NOV-LABEL: test:
; CHECK-NOV: # %bb.0:
; CHECK-NOV-NEXT: li a0, 1
; CHECK-NOV-NEXT: ret
;
; CHECK-V-LABEL: test:
; CHECK-V: # %bb.0:
; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.v.x v8, a0
; CHECK-V-NEXT: lui a0, 524288
; CHECK-V-NEXT: vslide1down.vx v8, v8, a0
; CHECK-V-NEXT: addi a0, a0, 2
; CHECK-V-NEXT: vmslt.vx v0, v8, a0
; CHECK-V-NEXT: vmv.v.i v8, 0
; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-V-NEXT: vslidedown.vi v8, v8, 1
; CHECK-V-NEXT: vmv.x.s a0, v8
; CHECK-V-NEXT: ret
%t2 = insertelement <2 x i32> <i32 poison, i32 -2147483648>, i32 %call.i, i64 0
%t3 = icmp slt <2 x i32> %t2, <i32 -2147483646, i32 -2147483646>
%t4 = zext <2 x i1> %t3 to <2 x i32>
%t6 = extractelement <2 x i32> %t4, i64 1
ret i32 %t6
}