[AArch64][SVE] Add patterns for bit-select instructions. (#138689)
This patch adds patterns to select SVE2 bit-sel instructions such as BSL
from (or (and a, c), (and b, (vnot c)))) and other similar patterns. For
example:
```cpp
svuint64_t bsl(svuint64_t a, svuint64_t b, svuint64_t c) {
return (a & c) | (b & ~c);
}
```
Currently:
```gas
bsl:
and z0.d, z2.d, z0.d
bic z1.d, z1.d, z2.d
orr z0.d, z0.d, z1.d
ret
```
Becomes:
```gas
bsl:
bsl z0.d, z0.d, z1.d, z2.d
ret
```
This commit is contained in:
@@ -21997,6 +21997,30 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
|
||||
return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp);
|
||||
}
|
||||
|
||||
static SDValue combineSVEBitSel(unsigned IID, SDNode *N, SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
SDValue Op2 = N->getOperand(2);
|
||||
SDValue Op3 = N->getOperand(3);
|
||||
|
||||
switch (IID) {
|
||||
default:
|
||||
llvm_unreachable("Called with wrong intrinsic!");
|
||||
case Intrinsic::aarch64_sve_bsl:
|
||||
return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1, Op2);
|
||||
case Intrinsic::aarch64_sve_bsl1n:
|
||||
return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, DAG.getNOT(DL, Op1, VT),
|
||||
Op2);
|
||||
case Intrinsic::aarch64_sve_bsl2n:
|
||||
return DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1,
|
||||
DAG.getNOT(DL, Op2, VT));
|
||||
case Intrinsic::aarch64_sve_nbsl:
|
||||
return DAG.getNOT(DL, DAG.getNode(AArch64ISD::BSP, DL, VT, Op3, Op1, Op2),
|
||||
VT);
|
||||
}
|
||||
}
|
||||
|
||||
static SDValue performIntrinsicCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
@@ -22319,6 +22343,11 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
||||
AArch64CC::LAST_ACTIVE);
|
||||
case Intrinsic::aarch64_sve_whilelo:
|
||||
return tryCombineWhileLo(N, DCI, Subtarget);
|
||||
case Intrinsic::aarch64_sve_bsl:
|
||||
case Intrinsic::aarch64_sve_bsl1n:
|
||||
case Intrinsic::aarch64_sve_bsl2n:
|
||||
case Intrinsic::aarch64_sve_nbsl:
|
||||
return combineSVEBitSel(IID, N, DAG);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@@ -545,12 +545,18 @@ def AArch64umulh : PatFrag<(ops node:$op1, node:$op2),
|
||||
|
||||
|
||||
def AArch64bsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
|
||||
[(int_aarch64_sve_bsl node:$Op1, node:$Op2, node:$Op3),
|
||||
(AArch64bsp node:$Op3, node:$Op1, node:$Op2)]>;
|
||||
[(AArch64bsp node:$Op3, node:$Op1, node:$Op2),
|
||||
(or (and node:$Op1, node:$Op3), (and node:$Op2, (vnot node:$Op3)))]>;
|
||||
|
||||
def AArch64nbsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
|
||||
[(int_aarch64_sve_nbsl node:$Op1, node:$Op2, node:$Op3),
|
||||
(vnot (AArch64bsp node:$Op3, node:$Op1, node:$Op2))]>;
|
||||
def AArch64bsl1n : PatFrag<(ops node:$Op1, node:$Op2, node:$Op3),
|
||||
(AArch64bsl (vnot node:$Op1), node:$Op2, node:$Op3)>;
|
||||
|
||||
def AArch64bsl2n : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
|
||||
[(AArch64bsl node:$Op1, (vnot node:$Op2), node:$Op3),
|
||||
(or (and node:$Op1, node:$Op3), (vnot (or node:$Op2, node:$Op3)))]>;
|
||||
|
||||
def AArch64nbsl : PatFrag<(ops node:$Op1, node:$Op2, node:$Op3),
|
||||
(vnot (AArch64bsl node:$Op1, node:$Op2, node:$Op3))>;
|
||||
|
||||
|
||||
let Predicates = [HasSVE] in {
|
||||
@@ -3934,8 +3940,8 @@ let Predicates = [HasSVE2_or_SME] in {
|
||||
defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", AArch64eor3>;
|
||||
defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", AArch64bcax>;
|
||||
defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", AArch64bsl>;
|
||||
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
|
||||
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
|
||||
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", AArch64bsl1n>;
|
||||
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", AArch64bsl2n>;
|
||||
defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", AArch64nbsl>;
|
||||
|
||||
// SVE2 bitwise xor and rotate right by immediate
|
||||
|
||||
@@ -93,3 +93,209 @@ define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
|
||||
%4 = xor <vscale x 2 x i64> %3, splat(i64 -1)
|
||||
ret <vscale x 2 x i64> %4
|
||||
}
|
||||
|
||||
; Test BSL/NBSL/BSL1N/BSL2N code generation for:
|
||||
; #define BSL(x,y,z) ( ((x) & (z)) | ( (y) & ~(z)))
|
||||
; #define NBSL(x,y,z) (~(((x) & (z)) | ( (y) & ~(z))))
|
||||
; #define BSL1N(x,y,z) ( (~(x) & (z)) | ( (y) & ~(z)))
|
||||
; #define BSL2N(x,y,z) ( ((x) & (z)) | (~(y) & ~(z)))
|
||||
|
||||
define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
|
||||
; CHECK-LABEL: codegen_bsl_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 16 x i8> %2, %0
|
||||
%5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
|
||||
%6 = and <vscale x 16 x i8> %1, %5
|
||||
%7 = or <vscale x 16 x i8> %4, %6
|
||||
ret <vscale x 16 x i8> %7
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
|
||||
; CHECK-LABEL: codegen_nbsl_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 16 x i8> %2, %0
|
||||
%5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
|
||||
%6 = and <vscale x 16 x i8> %1, %5
|
||||
%7 = or <vscale x 16 x i8> %4, %6
|
||||
%8 = xor <vscale x 16 x i8> %7, splat (i8 -1)
|
||||
ret <vscale x 16 x i8> %8
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @codegen_bsl1n_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
|
||||
; CHECK-LABEL: codegen_bsl1n_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = xor <vscale x 16 x i8> %0, splat (i8 -1)
|
||||
%5 = and <vscale x 16 x i8> %2, %4
|
||||
%6 = xor <vscale x 16 x i8> %2, splat (i8 -1)
|
||||
%7 = and <vscale x 16 x i8> %1, %6
|
||||
%8 = or <vscale x 16 x i8> %5, %7
|
||||
ret <vscale x 16 x i8> %8
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @codegen_bsl2n_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
|
||||
; CHECK-LABEL: codegen_bsl2n_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 16 x i8> %2, %0
|
||||
%5 = or <vscale x 16 x i8> %2, %1
|
||||
%6 = xor <vscale x 16 x i8> %5, splat (i8 -1)
|
||||
%7 = or <vscale x 16 x i8> %4, %6
|
||||
ret <vscale x 16 x i8> %7
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
|
||||
; CHECK-LABEL: codegen_bsl_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 8 x i16> %2, %0
|
||||
%5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
|
||||
%6 = and <vscale x 8 x i16> %1, %5
|
||||
%7 = or <vscale x 8 x i16> %4, %6
|
||||
ret <vscale x 8 x i16> %7
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
|
||||
; CHECK-LABEL: codegen_nbsl_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 8 x i16> %2, %0
|
||||
%5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
|
||||
%6 = and <vscale x 8 x i16> %1, %5
|
||||
%7 = or <vscale x 8 x i16> %4, %6
|
||||
%8 = xor <vscale x 8 x i16> %7, splat (i16 -1)
|
||||
ret <vscale x 8 x i16> %8
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @codegen_bsl1n_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
|
||||
; CHECK-LABEL: codegen_bsl1n_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = xor <vscale x 8 x i16> %0, splat (i16 -1)
|
||||
%5 = and <vscale x 8 x i16> %2, %4
|
||||
%6 = xor <vscale x 8 x i16> %2, splat (i16 -1)
|
||||
%7 = and <vscale x 8 x i16> %1, %6
|
||||
%8 = or <vscale x 8 x i16> %5, %7
|
||||
ret <vscale x 8 x i16> %8
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @codegen_bsl2n_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
|
||||
; CHECK-LABEL: codegen_bsl2n_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 8 x i16> %2, %0
|
||||
%5 = or <vscale x 8 x i16> %2, %1
|
||||
%6 = xor <vscale x 8 x i16> %5, splat (i16 -1)
|
||||
%7 = or <vscale x 8 x i16> %4, %6
|
||||
ret <vscale x 8 x i16> %7
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
|
||||
; CHECK-LABEL: codegen_bsl_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 4 x i32> %2, %0
|
||||
%5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
|
||||
%6 = and <vscale x 4 x i32> %1, %5
|
||||
%7 = or <vscale x 4 x i32> %4, %6
|
||||
ret <vscale x 4 x i32> %7
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
|
||||
; CHECK-LABEL: codegen_nbsl_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 4 x i32> %2, %0
|
||||
%5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
|
||||
%6 = and <vscale x 4 x i32> %1, %5
|
||||
%7 = or <vscale x 4 x i32> %4, %6
|
||||
%8 = xor <vscale x 4 x i32> %7, splat (i32 -1)
|
||||
ret <vscale x 4 x i32> %8
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @codegen_bsl1n_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
|
||||
; CHECK-LABEL: codegen_bsl1n_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = xor <vscale x 4 x i32> %0, splat (i32 -1)
|
||||
%5 = and <vscale x 4 x i32> %2, %4
|
||||
%6 = xor <vscale x 4 x i32> %2, splat (i32 -1)
|
||||
%7 = and <vscale x 4 x i32> %1, %6
|
||||
%8 = or <vscale x 4 x i32> %5, %7
|
||||
ret <vscale x 4 x i32> %8
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @codegen_bsl2n_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
|
||||
; CHECK-LABEL: codegen_bsl2n_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 4 x i32> %2, %0
|
||||
%5 = or <vscale x 4 x i32> %2, %1
|
||||
%6 = xor <vscale x 4 x i32> %5, splat (i32 -1)
|
||||
%7 = or <vscale x 4 x i32> %4, %6
|
||||
ret <vscale x 4 x i32> %7
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
|
||||
; CHECK-LABEL: codegen_bsl_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 2 x i64> %2, %0
|
||||
%5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
|
||||
%6 = and <vscale x 2 x i64> %1, %5
|
||||
%7 = or <vscale x 2 x i64> %4, %6
|
||||
ret <vscale x 2 x i64> %7
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
|
||||
; CHECK-LABEL: codegen_nbsl_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 2 x i64> %2, %0
|
||||
%5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
|
||||
%6 = and <vscale x 2 x i64> %1, %5
|
||||
%7 = or <vscale x 2 x i64> %4, %6
|
||||
%8 = xor <vscale x 2 x i64> %7, splat (i64 -1)
|
||||
ret <vscale x 2 x i64> %8
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @codegen_bsl1n_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
|
||||
; CHECK-LABEL: codegen_bsl1n_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = xor <vscale x 2 x i64> %0, splat (i64 -1)
|
||||
%5 = and <vscale x 2 x i64> %2, %4
|
||||
%6 = xor <vscale x 2 x i64> %2, splat (i64 -1)
|
||||
%7 = and <vscale x 2 x i64> %1, %6
|
||||
%8 = or <vscale x 2 x i64> %5, %7
|
||||
ret <vscale x 2 x i64> %8
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @codegen_bsl2n_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
|
||||
; CHECK-LABEL: codegen_bsl2n_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%4 = and <vscale x 2 x i64> %2, %0
|
||||
%5 = or <vscale x 2 x i64> %2, %1
|
||||
%6 = xor <vscale x 2 x i64> %5, splat (i64 -1)
|
||||
%7 = or <vscale x 2 x i64> %4, %6
|
||||
ret <vscale x 2 x i64> %7
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user