Files
clang-p2996/llvm/test/CodeGen/LoongArch/lsx/vselect.ll
woruyu bbcebec3af [DAG] Refactor X86 combineVSelectWithAllOnesOrZeros fold into a generic DAG Combine (#145298)
This PR resolves https://github.com/llvm/llvm-project/issues/144513

The modification include five pattern :
1.vselect Cond, 0, 0 → 0
2.vselect Cond, -1, 0 → bitcast Cond
3.vselect Cond, -1, x → or Cond, x
4.vselect Cond, x, 0 → and Cond, x
5.vselect Cond, 000..., X -> andn Cond, X

1-4 have been migrated to DAGCombine. 5 still in x86 code.

The reason is that you cannot use the andn instruction directly in
DAGCombine, you can only use and+xor, which will introduce optimization
order issues. For example, in the x86 backend, select Cond, 0, x →
(~Cond) & x, the backend will first check whether the cond node of
(~Cond) is a setcc node. If so, it will modify the comparison operator
of the condition.So the x86 backend cannot complete the optimization of
andn.In short, I think it is a better choice to keep the pattern of
vselect Cond, 000..., X instead of and+xor in combineDAG.

For commit, the first is code changes and x86 test(note 1), the second
is tests in other backend(node 2).

---------

Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
2025-07-02 15:07:48 +01:00

85 lines
3.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: select_v16i8_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vrepli.h $vr1, -256
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i8>, ptr %a0
%sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %v0
store <16 x i8> %sel, ptr %res
ret void
}
define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vrepli.h $vr2, -256
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i8>, ptr %a0
%v1 = load <16 x i8>, ptr %a1
%sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> %v0, <16 x i8> %v1
store <16 x i8> %sel, ptr %res
ret void
}
define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: lu12i.w $a1, -16
; CHECK-NEXT: vreplgr2vr.w $vr2, $a1
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i16>, ptr %a0
%v1 = load <8 x i16>, ptr %a1
%sel = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i16> %v0, <8 x i16> %v1
store <8 x i16> %sel, ptr %res
ret void
}
define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: ori $a1, $zero, 0
; CHECK-NEXT: lu32i.d $a1, -1
; CHECK-NEXT: vreplgr2vr.d $vr2, $a1
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i32>, ptr %a0
%v1 = load <4 x i32>, ptr %a1
%sel = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> %v0, <4 x i32> %v1
store <4 x i32> %sel, ptr %res
ret void
}
define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-LABEL: select_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0)
; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0)
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <2 x i64>, ptr %a0
%v1 = load <2 x i64>, ptr %a1
%sel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v0, <2 x i64> %v1
store <2 x i64> %sel, ptr %res
ret void
}