[SDAG] fold bitwise logic with shifted operands

LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z

https://alive2.llvm.org/ce/z/QmR9rR

This is a reassociation + factoring fold. The common shift operation is moved
after a bitwise logic op on 2 input operands.
We get simpler cases of these patterns in IR, but I suspect we would miss all
of these exact tests in IR too. We also handle the simpler form of this plus
several other folds in DAGCombiner::hoistLogicOpWithSameOpcodeHands().

This is a partial implementation of a transform suggested in D111530
(only handles 'or' bitwise logic as a first step - need to stamp out more
tests for other opcodes).
Several of the same tests added for D111530 are altered here (but not
fully optimized). I'm not sure yet if this would help/hinder that patch,
but this should be an improvement for all tests added with ecf606cb43
since it removes a shift operation in those examples.

Differential Revision: https://reviews.llvm.org/D120516
This commit is contained in:
Sanjay Patel
2022-02-27 08:33:43 -05:00
parent beb92af01b
commit acb96ffd14
7 changed files with 232 additions and 247 deletions

View File

@@ -6696,6 +6696,52 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
return SDValue();
}
/// Given a bitwise logic operation N with a matching bitwise logic operand,
/// fold a pattern where 2 of the source operands are identically shifted
/// values. For example:
/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
SelectionDAG &DAG) {
// TODO: This should be extended to allow AND/XOR.
assert(N->getOpcode() == ISD::OR && "Expected bitwise logic operation");
if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
return SDValue();
// Match another bitwise logic op and a shift.
unsigned LogicOpcode = N->getOpcode();
unsigned ShiftOpcode = ShiftOp.getOpcode();
if (LogicOp.getOpcode() != LogicOpcode ||
!(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
ShiftOpcode == ISD::SRA))
return SDValue();
// Match another shift op inside the first logic operand. Handle both commuted
// possibilities.
// LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
// LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
SDValue X1 = ShiftOp.getOperand(0);
SDValue Y = ShiftOp.getOperand(1);
SDValue X0, Z;
if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
LogicOp.getOperand(0).getOperand(1) == Y) {
X0 = LogicOp.getOperand(0).getOperand(0);
Z = LogicOp.getOperand(1);
} else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
LogicOp.getOperand(1).getOperand(1) == Y) {
X0 = LogicOp.getOperand(1).getOperand(0);
Z = LogicOp.getOperand(0);
} else {
return SDValue();
}
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
}
/// OR combines for which the commuted variant will be tried as well.
static SDValue visitORCommutative(
SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
@@ -6710,6 +6756,9 @@ static SDValue visitORCommutative(
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
}
if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
return R;
auto peekThroughZext = [](SDValue V) {
if (V->getOpcode() == ISD::ZERO_EXTEND)
return V->getOperand(0);

View File

@@ -12,8 +12,8 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds x0, x0, #1
; CHECK-NEXT: adcs x1, x1, xzr
; CHECK-NEXT: extr x8, x1, x0, #60
; CHECK-NEXT: orr x8, x8, x1, lsr #60
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x8, x1, x8, #60
; CHECK-NEXT: cbnz x8, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
@@ -32,8 +32,8 @@ exit:
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_eq_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x1, x0, #17
; CHECK-NEXT: orr x8, x8, x1, lsr #17
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x8, x1, x8, #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -45,8 +45,8 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_ne_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x1, x0, #17
; CHECK-NEXT: orr x8, x8, x1, lsr #17
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x8, x1, x8, #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -58,8 +58,8 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_eq_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x1, x0, #47
; CHECK-NEXT: orr x8, x8, x0, lsl #17
; CHECK-NEXT: orr x8, x1, x0
; CHECK-NEXT: extr x8, x8, x0, #47
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -71,8 +71,8 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x1, x0, #47
; CHECK-NEXT: orr x8, x8, x0, lsl #17
; CHECK-NEXT: orr x8, x1, x0
; CHECK-NEXT: extr x8, x8, x0, #47
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -106,8 +106,8 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x0, x1, #47
; CHECK-NEXT: orr x8, x8, x1, lsl #17
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: extr x8, x8, x1, #47
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -142,12 +142,12 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
define i1 @opt_setcc_shl_ne_zero_i256(i256 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero_i256:
; CHECK: // %bb.0:
; CHECK-NEXT: extr x8, x3, x2, #47
; CHECK-NEXT: extr x9, x2, x1, #47
; CHECK-NEXT: orr x8, x2, x0
; CHECK-NEXT: extr x9, x3, x2, #47
; CHECK-NEXT: extr x10, x1, x0, #47
; CHECK-NEXT: orr x9, x9, x0, lsl #17
; CHECK-NEXT: orr x8, x10, x8
; CHECK-NEXT: orr x8, x9, x8
; CHECK-NEXT: extr x8, x8, x1, #47
; CHECK-NEXT: orr x9, x10, x9
; CHECK-NEXT: orr x8, x8, x9
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret

View File

@@ -4,13 +4,11 @@
define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: or_lshr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: and w9, w1, #0xff
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: lsr w9, w9, w2
; CHECK-NEXT: orr w8, w8, w3
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i8 %x0, %y
%sh2 = lshr i8 %x1, %y
@@ -22,10 +20,9 @@ define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
define i32 @or_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: or_lshr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, w2
; CHECK-NEXT: lsr w9, w1, w2
; CHECK-NEXT: orr w8, w3, w8
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsr w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = lshr i32 %x0, %y
%sh2 = lshr i32 %x1, %y
@@ -38,10 +35,9 @@ define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <
; CHECK-LABEL: or_lshr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.8h, v2.8h
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ushl v1.8h, v1.8h, v2.8h
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%sh1 = lshr <8 x i16> %x0, %y
%sh2 = lshr <8 x i16> %x1, %y
@@ -54,10 +50,9 @@ define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
; CHECK-LABEL: or_lshr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.2d, v2.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: ushl v1.2d, v1.2d, v2.2d
; CHECK-NEXT: orr v0.16b, v3.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = lshr <2 x i64> %x0, %y
%sh2 = lshr <2 x i64> %x1, %y
@@ -69,13 +64,11 @@ define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <
define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
; CHECK-LABEL: or_ashr_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: sxth w9, w1
; CHECK-NEXT: sxth w8, w8
; CHECK-NEXT: asr w8, w8, w2
; CHECK-NEXT: asr w9, w9, w2
; CHECK-NEXT: orr w8, w8, w3
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = ashr i16 %x0, %y
%sh2 = ashr i16 %x1, %y
@@ -87,10 +80,9 @@ define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
define i64 @or_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: or_ashr_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: asr x8, x0, x2
; CHECK-NEXT: asr x9, x1, x2
; CHECK-NEXT: orr x8, x3, x8
; CHECK-NEXT: orr x0, x8, x9
; CHECK-NEXT: orr x8, x0, x1
; CHECK-NEXT: asr x8, x8, x2
; CHECK-NEXT: orr x0, x8, x3
; CHECK-NEXT: ret
%sh1 = ashr i64 %x0, %y
%sh2 = ashr i64 %x1, %y
@@ -103,10 +95,9 @@ define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <
; CHECK-LABEL: or_ashr_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.4s, v0.4s, v2.4s
; CHECK-NEXT: sshl v1.4s, v1.4s, v2.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%sh1 = ashr <4 x i32> %x0, %y
%sh2 = ashr <4 x i32> %x1, %y
@@ -119,10 +110,9 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
; CHECK-LABEL: or_ashr_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: neg v2.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshl v0.16b, v0.16b, v2.16b
; CHECK-NEXT: sshl v1.16b, v1.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v3.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = ashr <16 x i8> %x0, %y
%sh2 = ashr <16 x i8> %x1, %y
@@ -134,10 +124,9 @@ define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <
define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK-LABEL: or_shl_commute0:
; CHECK: // %bb.0:
; CHECK-NEXT: lsl w8, w0, w2
; CHECK-NEXT: lsl w9, w1, w2
; CHECK-NEXT: orr w8, w8, w3
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i32 %x0, %y
%sh2 = shl i32 %x1, %y
@@ -149,11 +138,10 @@ define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK-LABEL: or_shl_commute1:
; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
; CHECK-NEXT: lsl w8, w0, w2
; CHECK-NEXT: lsl w9, w1, w2
; CHECK-NEXT: orr w8, w3, w8
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: lsl w8, w8, w2
; CHECK-NEXT: orr w0, w8, w3
; CHECK-NEXT: ret
%sh1 = shl i8 %x0, %y
%sh2 = shl i8 %x1, %y
@@ -165,10 +153,9 @@ define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: or_shl_commute2:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.2d, v0.2d, v2.2d
; CHECK-NEXT: ushl v1.2d, v1.2d, v2.2d
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%sh1 = shl <2 x i64> %x0, %y
%sh2 = shl <2 x i64> %x1, %y
@@ -180,10 +167,9 @@ define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2
define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: or_shl_commute3:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushl v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ushl v1.8h, v1.8h, v2.8h
; CHECK-NEXT: orr v0.16b, v3.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: ret
%sh1 = shl <8 x i16> %x0, %y
%sh2 = shl <8 x i16> %x1, %y
@@ -192,6 +178,8 @@ define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8
ret <8 x i16> %r
}
; negative test - mismatched shift opcodes
define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: or_mix_shr:
; CHECK: // %bb.0:
@@ -207,6 +195,8 @@ define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
ret i64 %r
}
; negative test - mixed shift amounts
define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
; CHECK-LABEL: or_lshr_mix_shift_amount:
; CHECK: // %bb.0:
@@ -222,6 +212,8 @@ define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
ret i64 %r
}
; negative test - mismatched logic opcodes
define i64 @mix_logic_lshr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: mix_logic_lshr:
; CHECK: // %bb.0:

View File

@@ -630,14 +630,14 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8]
; CHECKV7M-NEXT: beq .LBB6_2
; CHECKV7M-NEXT: @ %bb.1: @ %then
; CHECKV7M-NEXT: orrs r2, r3
; CHECKV7M-NEXT: lsrs r2, r2, #17
; CHECKV7M-NEXT: orr.w r2, r2, r3, lsl #15
; CHECKV7M-NEXT: orr.w r2, r2, r3, lsr #17
; CHECKV7M-NEXT: lsr.w r3, r12, #17
; CHECKV7M-NEXT: orr.w r3, r3, r1, lsl #15
; CHECKV7M-NEXT: orr.w r3, r12, r1
; CHECKV7M-NEXT: cmp r2, #0
; CHECKV7M-NEXT: mov r2, r0
; CHECKV7M-NEXT: orr.w r1, r3, r1, lsr #17
; CHECKV7M-NEXT: lsr.w r3, r3, #17
; CHECKV7M-NEXT: orr.w r1, r3, r1, lsl #15
; CHECKV7M-NEXT: it ne
; CHECKV7M-NEXT: movne r2, lr
; CHECKV7M-NEXT: cmp r1, #0
@@ -646,9 +646,9 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7M-NEXT: add r0, r2
; CHECKV7M-NEXT: pop {r7, pc}
; CHECKV7M-NEXT: .LBB6_2: @ %else
; CHECKV7M-NEXT: lsrs r1, r2, #17
; CHECKV7M-NEXT: orr.w r1, r2, r3
; CHECKV7M-NEXT: lsrs r1, r1, #17
; CHECKV7M-NEXT: orr.w r1, r1, r3, lsl #15
; CHECKV7M-NEXT: orr.w r1, r1, r3, lsr #17
; CHECKV7M-NEXT: cmp r1, #0
; CHECKV7M-NEXT: it ne
; CHECKV7M-NEXT: movne r0, lr
@@ -664,14 +664,14 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7A-NEXT: lsls r4, r4, #31
; CHECKV7A-NEXT: beq .LBB6_2
; CHECKV7A-NEXT: @ %bb.1: @ %then
; CHECKV7A-NEXT: orrs r2, r3
; CHECKV7A-NEXT: lsrs r2, r2, #17
; CHECKV7A-NEXT: orr.w r2, r2, r3, lsl #15
; CHECKV7A-NEXT: orr.w r2, r2, r3, lsr #17
; CHECKV7A-NEXT: lsr.w r3, r12, #17
; CHECKV7A-NEXT: orr.w r3, r3, r1, lsl #15
; CHECKV7A-NEXT: orr.w r3, r12, r1
; CHECKV7A-NEXT: cmp r2, #0
; CHECKV7A-NEXT: mov r2, r0
; CHECKV7A-NEXT: orr.w r1, r3, r1, lsr #17
; CHECKV7A-NEXT: lsr.w r3, r3, #17
; CHECKV7A-NEXT: orr.w r1, r3, r1, lsl #15
; CHECKV7A-NEXT: it ne
; CHECKV7A-NEXT: movne r2, lr
; CHECKV7A-NEXT: cmp r1, #0
@@ -680,9 +680,9 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7A-NEXT: add r0, r2
; CHECKV7A-NEXT: pop {r4, pc}
; CHECKV7A-NEXT: .LBB6_2: @ %else
; CHECKV7A-NEXT: lsrs r1, r2, #17
; CHECKV7A-NEXT: orr.w r1, r2, r3
; CHECKV7A-NEXT: lsrs r1, r1, #17
; CHECKV7A-NEXT: orr.w r1, r1, r3, lsl #15
; CHECKV7A-NEXT: orr.w r1, r1, r3, lsr #17
; CHECKV7A-NEXT: cmp r1, #0
; CHECKV7A-NEXT: it ne
; CHECKV7A-NEXT: movne r0, lr

View File

@@ -12,9 +12,9 @@ define i64 @opt_setcc_lt_power_of_2(i64 %a) nounwind {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r0, r0, #1
; CHECK-NEXT: adc r1, r1, #0
; CHECK-NEXT: lsr r2, r0, #16
; CHECK-NEXT: orr r2, r2, r1, lsl #16
; CHECK-NEXT: orr r2, r2, r1, lsr #16
; CHECK-NEXT: orr r2, r0, r1
; CHECK-NEXT: uxth r3, r1
; CHECK-NEXT: orr r2, r3, r2, lsr #16
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: bne .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %exit
@@ -34,9 +34,9 @@ exit:
define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_eq_zero:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: lsr r0, r0, #17
; CHECK-NEXT: orr r0, r0, r1, lsl #15
; CHECK-NEXT: orr r0, r0, r1, lsr #17
; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr
@@ -48,9 +48,9 @@ define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind {
define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_ne_zero:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: lsr r0, r0, #17
; CHECK-NEXT: orr r0, r0, r1, lsl #15
; CHECK-NEXT: orr r0, r0, r1, lsr #17
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
@@ -62,9 +62,9 @@ define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_eq_zero:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r1, r1, r0
; CHECK-NEXT: lsl r1, r1, #17
; CHECK-NEXT: orr r1, r1, r0, lsr #15
; CHECK-NEXT: orr r0, r1, r0, lsl #17
; CHECK-NEXT: orr r0, r1, r0, lsr #15
; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr
@@ -76,9 +76,9 @@ define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind {
define i1 @opt_setcc_shl_ne_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r1, r1, r0
; CHECK-NEXT: lsl r1, r1, #17
; CHECK-NEXT: orr r1, r1, r0, lsr #15
; CHECK-NEXT: orr r0, r1, r0, lsl #17
; CHECK-NEXT: orr r0, r1, r0, lsr #15
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
@@ -113,9 +113,9 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i64 %a) nounwind {
define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, r1
; CHECK-NEXT: lsl r0, r0, #17
; CHECK-NEXT: orr r0, r0, r1, lsr #15
; CHECK-NEXT: orr r0, r0, r1, lsl #17
; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr
@@ -154,11 +154,11 @@ define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind {
; CHECK-NEXT: lsl r3, r3, #17
; CHECK-NEXT: orr r12, r3, r2, lsr #15
; CHECK-NEXT: lsl r3, r1, #17
; CHECK-NEXT: lsl r2, r2, #17
; CHECK-NEXT: orr r3, r3, r0, lsr #15
; CHECK-NEXT: orr r1, r2, r1, lsr #15
; CHECK-NEXT: orr r0, r2, r0
; CHECK-NEXT: orr r3, r3, r12
; CHECK-NEXT: orr r0, r1, r0, lsl #17
; CHECK-NEXT: lsl r0, r0, #17
; CHECK-NEXT: orr r0, r0, r1, lsr #15
; CHECK-NEXT: orrs r0, r0, r3
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr

View File

@@ -13,34 +13,30 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB0_1: # %loop
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: addl $1, %ecx
; X86-NEXT: addl $1, %edi
; X86-NEXT: adcl $0, %esi
; X86-NEXT: adcl $0, %edx
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: shldl $4, %edx, %edi
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: shldl $4, %esi, %ebp
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: shrl $28, %ecx
; X86-NEXT: orl %ebp, %ecx
; X86-NEXT: orl %edi, %ecx
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: shldl $4, %edx, %ebx
; X86-NEXT: movl %esi, %ebp
; X86-NEXT: orl %ecx, %ebp
; X86-NEXT: shrdl $28, %edx, %ebp
; X86-NEXT: orl %ebx, %ebp
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %exit
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ebx, 12(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -56,11 +52,9 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: addq $1, %rax
; X64-NEXT: adcq $0, %rdx
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: shldq $4, %rax, %rcx
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: shrq $60, %rsi
; X64-NEXT: orq %rcx, %rsi
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: orq %rdx, %rcx
; X64-NEXT: shrdq $60, %rdx, %rcx
; X64-NEXT: jne .LBB0_1
; X64-NEXT: # %bb.2: # %exit
; X64-NEXT: retq
@@ -79,30 +73,25 @@ exit:
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_srl_eq_zero:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: shldl $15, %edx, %edi
; X86-NEXT: shldl $15, %ecx, %edx
; X86-NEXT: shrdl $17, %ecx, %eax
; X86-NEXT: orl %edi, %eax
; X86-NEXT: shrl $17, %esi
; X86-NEXT: orl %edx, %esi
; X86-NEXT: orl %eax, %esi
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: shldl $15, %edx, %esi
; X86-NEXT: orl %esi, %eax
; X86-NEXT: shrdl $17, %edx, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: sete %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_srl_eq_zero:
; X64: # %bb.0:
; X64-NEXT: orq %rsi, %rdi
; X64-NEXT: shrdq $17, %rsi, %rdi
; X64-NEXT: shrq $17, %rsi
; X64-NEXT: orq %rdi, %rsi
; X64-NEXT: sete %al
; X64-NEXT: retq
%srl = lshr i128 %a, 17
@@ -113,30 +102,25 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_srl_ne_zero:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: shldl $15, %edx, %edi
; X86-NEXT: shldl $15, %ecx, %edx
; X86-NEXT: shrdl $17, %ecx, %eax
; X86-NEXT: orl %edi, %eax
; X86-NEXT: shrl $17, %esi
; X86-NEXT: orl %edx, %esi
; X86-NEXT: orl %eax, %esi
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: shldl $15, %edx, %esi
; X86-NEXT: orl %esi, %eax
; X86-NEXT: shrdl $17, %edx, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: setne %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_srl_ne_zero:
; X64: # %bb.0:
; X64-NEXT: orq %rsi, %rdi
; X64-NEXT: shrdq $17, %rsi, %rdi
; X64-NEXT: shrq $17, %rsi
; X64-NEXT: orq %rdi, %rsi
; X64-NEXT: setne %al
; X64-NEXT: retq
%srl = lshr i128 %a, 17
@@ -148,26 +132,24 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_eq_zero:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shldl $17, %esi, %edx
; X86-NEXT: shldl $17, %ecx, %esi
; X86-NEXT: shldl $17, %edx, %esi
; X86-NEXT: orl %eax, %edx
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
; X86-NEXT: shll $17, %eax
; X86-NEXT: orl %esi, %eax
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: sete %al
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_eq_zero:
; X64: # %bb.0:
; X64-NEXT: orq %rdi, %rsi
; X64-NEXT: shldq $17, %rdi, %rsi
; X64-NEXT: shlq $17, %rdi
; X64-NEXT: orq %rsi, %rdi
; X64-NEXT: sete %al
; X64-NEXT: retq
%shl = shl i128 %a, 17
@@ -179,26 +161,24 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
; X86-LABEL: opt_setcc_shl_ne_zero:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shldl $17, %esi, %edx
; X86-NEXT: shldl $17, %ecx, %esi
; X86-NEXT: shldl $17, %edx, %esi
; X86-NEXT: orl %eax, %edx
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
; X86-NEXT: shll $17, %eax
; X86-NEXT: orl %esi, %eax
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: setne %al
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_shl_ne_zero:
; X64: # %bb.0:
; X64-NEXT: orq %rdi, %rsi
; X64-NEXT: shldq $17, %rdi, %rsi
; X64-NEXT: shlq $17, %rdi
; X64-NEXT: orq %rsi, %rdi
; X64-NEXT: setne %al
; X64-NEXT: retq
%shl = shl i128 %a, 17
@@ -262,27 +242,21 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: shldl $17, %edx, %esi
; X86-NEXT: orl %eax, %edx
; X86-NEXT: shldl $17, %ecx, %edx
; X86-NEXT: shldl $17, %eax, %ecx
; X86-NEXT: shll $17, %eax
; X86-NEXT: orl %edx, %eax
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
; X64: # %bb.0:
; X64-NEXT: orq %rsi, %rdi
; X64-NEXT: shldq $17, %rsi, %rdi
; X64-NEXT: shlq $17, %rsi
; X64-NEXT: orq %rdi, %rsi
; X64-NEXT: sete %al
; X64-NEXT: retq
%shl.a = shl i64 %a, 17

View File

@@ -6,10 +6,9 @@ define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shrb %cl, %dil
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrb %cl, %sil
; CHECK-NEXT: orb %sil, %al
; CHECK-NEXT: shrb %cl, %dil
; CHECK-NEXT: orb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
@@ -25,11 +24,10 @@ define i32 @or_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shrl %cl, %edi
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: orl %edi, %esi
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: shrl %cl, %edi
; CHECK-NEXT: orl %edi, %eax
; CHECK-NEXT: retq
%sh1 = lshr i32 %x0, %y
%sh2 = lshr i32 %x1, %y
@@ -41,17 +39,13 @@ define i32 @or_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: or_lshr_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm4
; CHECK-NEXT: vpackusdw %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; CHECK-NEXT: vpsrlvd %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
; CHECK-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpor %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%sh1 = lshr <8 x i16> %x0, %y
@@ -64,10 +58,9 @@ define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <
define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: or_lshr_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsrlvq %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = lshr <2 x i64> %x0, %y
%sh2 = lshr <2 x i64> %x1, %y
@@ -81,13 +74,11 @@ define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %r8d
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: movswl %si, %eax
; CHECK-NEXT: movswl %di, %edx
; CHECK-NEXT: sarl %cl, %edx
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: movswl %di, %eax
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: sarl %cl, %eax
; CHECK-NEXT: orl %r8d, %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%sh1 = ashr i16 %x0, %y
@@ -102,11 +93,10 @@ define i64 @or_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: sarq %cl, %rdi
; CHECK-NEXT: orq %rsi, %rdi
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-NEXT: sarq %cl, %rsi
; CHECK-NEXT: orq %rdi, %rsi
; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: sarq %cl, %rdi
; CHECK-NEXT: orq %rdi, %rax
; CHECK-NEXT: retq
%sh1 = ashr i64 %x0, %y
%sh2 = ashr i64 %x1, %y
@@ -118,10 +108,9 @@ define i64 @or_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: or_ashr_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsravd %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpor %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpsravd %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = ashr <4 x i32> %x0, %y
%sh2 = ashr <4 x i32> %x1, %y
@@ -133,49 +122,32 @@ define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <
define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
; CHECK-LABEL: or_ashr_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm4, %xmm5
; CHECK-NEXT: vpsllw $5, %xmm2, %xmm2
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsraw $2, %xmm4, %xmm5
; CHECK-NEXT: vpaddw %xmm6, %xmm6, %xmm7
; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsraw $1, %xmm4, %xmm5
; CHECK-NEXT: vpaddw %xmm7, %xmm7, %xmm8
; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vpsrlw $8, %xmm4, %xmm9
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm5
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm10
; CHECK-NEXT: vpblendvb %xmm10, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm9, %xmm0, %xmm9
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm5, %xmm0
; CHECK-NEXT: vpblendvb %xmm6, %xmm0, %xmm5, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm5
; CHECK-NEXT: vpblendvb %xmm7, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm5
; CHECK-NEXT: vpblendvb %xmm8, %xmm5, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: vpsraw $4, %xmm1, %xmm5
; CHECK-NEXT: vpblendvb %xmm2, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm2
; CHECK-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm2
; CHECK-NEXT: vpblendvb %xmm10, %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $2, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsraw $1, %xmm1, %xmm5
; CHECK-NEXT: vpaddw %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
; CHECK-NEXT: vpsrlw $8, %xmm1, %xmm1
; CHECK-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpor %xmm0, %xmm9, %xmm0
; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; CHECK-NEXT: vpsraw $4, %xmm0, %xmm4
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $2, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsraw $1, %xmm0, %xmm4
; CHECK-NEXT: vpaddw %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpsrlw $8, %xmm0, %xmm0
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = ashr <16 x i8> %x0, %y
%sh2 = ashr <16 x i8> %x1, %y
@@ -189,10 +161,9 @@ define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shll %cl, %edi
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %esi
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: shll %cl, %edi
; CHECK-NEXT: orl %edi, %eax
; CHECK-NEXT: retq
%sh1 = shl i32 %x0, %y
@@ -207,11 +178,10 @@ define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shlb %cl, %dil
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shlb %cl, %sil
; CHECK-NEXT: orb %dil, %sil
; CHECK-NEXT: orb %sil, %al
; CHECK-NEXT: shlb %cl, %dil
; CHECK-NEXT: orb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%sh1 = shl i8 %x0, %y
@@ -224,10 +194,9 @@ define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
; CHECK-LABEL: or_shl_commute2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpsllvq %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpor %xmm1, %xmm3, %xmm1
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpsllvq %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: retq
%sh1 = shl <2 x i64> %x0, %y
%sh2 = shl <2 x i64> %x1, %y
@@ -239,18 +208,13 @@ define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2
define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
; CHECK-LABEL: or_shl_commute3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; CHECK-NEXT: vpshufb %ymm4, %ymm0, %ymm0
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; CHECK-NEXT: vpsllvd %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vpshufb %ymm4, %ymm1, %ymm1
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: vpor %xmm3, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%sh1 = shl <8 x i16> %x0, %y
@@ -260,6 +224,8 @@ define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8
ret <8 x i16> %r
}
; negative test - mismatched shift opcodes
define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: or_mix_shr:
; CHECK: # %bb.0:
@@ -278,6 +244,8 @@ define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
ret i64 %r
}
; negative test - mismatched shift amounts
define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
; CHECK-LABEL: or_lshr_mix_shift_amount:
; CHECK: # %bb.0:
@@ -297,6 +265,8 @@ define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
ret i64 %r
}
; negative test - mismatched logic opcodes
define i64 @mix_logic_lshr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
; CHECK-LABEL: mix_logic_lshr:
; CHECK: # %bb.0: