This PR resolves https://github.com/llvm/llvm-project/issues/144513 The modification include five pattern : 1.vselect Cond, 0, 0 → 0 2.vselect Cond, -1, 0 → bitcast Cond 3.vselect Cond, -1, x → or Cond, x 4.vselect Cond, x, 0 → and Cond, x 5.vselect Cond, 000..., X -> andn Cond, X 1-4 have been migrated to DAGCombine. 5 still in x86 code. The reason is that you cannot use the andn instruction directly in DAGCombine, you can only use and+xor, which will introduce optimization order issues. For example, in the x86 backend, select Cond, 0, x → (~Cond) & x, the backend will first check whether the cond node of (~Cond) is a setcc node. If so, it will modify the comparison operator of the condition.So the x86 backend cannot complete the optimization of andn.In short, I think it is a better choice to keep the pattern of vselect Cond, 000..., X instead of and+xor in combineDAG. For commit, the first is code changes and x86 test(note 1), the second is tests in other backend(node 2). --------- Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
85 lines
3.2 KiB
LLVM
85 lines
3.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
|
|
|
|
define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind {
|
|
; CHECK-LABEL: select_v16i8_imm:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vrepli.h $vr1, -256
|
|
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v0 = load <16 x i8>, ptr %a0
|
|
%sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %v0
|
|
store <16 x i8> %sel, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: select_v16i8:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: vrepli.h $vr2, -256
|
|
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v0 = load <16 x i8>, ptr %a0
|
|
%v1 = load <16 x i8>, ptr %a1
|
|
%sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> %v0, <16 x i8> %v1
|
|
store <16 x i8> %sel, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: select_v8i16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: lu12i.w $a1, -16
|
|
; CHECK-NEXT: vreplgr2vr.w $vr2, $a1
|
|
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v0 = load <8 x i16>, ptr %a0
|
|
%v1 = load <8 x i16>, ptr %a1
|
|
%sel = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i16> %v0, <8 x i16> %v1
|
|
store <8 x i16> %sel, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: select_v4i32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: ori $a1, $zero, 0
|
|
; CHECK-NEXT: lu32i.d $a1, -1
|
|
; CHECK-NEXT: vreplgr2vr.d $vr2, $a1
|
|
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v0 = load <4 x i32>, ptr %a0
|
|
%v1 = load <4 x i32>, ptr %a1
|
|
%sel = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> %v0, <4 x i32> %v1
|
|
store <4 x i32> %sel, ptr %res
|
|
ret void
|
|
}
|
|
|
|
define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
|
|
; CHECK-LABEL: select_v2i64:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vld $vr0, $a1, 0
|
|
; CHECK-NEXT: vld $vr1, $a2, 0
|
|
; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0)
|
|
; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0)
|
|
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
|
|
; CHECK-NEXT: vst $vr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v0 = load <2 x i64>, ptr %a0
|
|
%v1 = load <2 x i64>, ptr %a1
|
|
%sel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v0, <2 x i64> %v1
|
|
store <2 x i64> %sel, ptr %res
|
|
ret void
|
|
}
|