[InstCombine] generalize fold for mask-with-signbit-splat

(iN X s>> (N-1)) & Y --> (X < 0) ? Y : 0

https://alive2.llvm.org/ce/z/qeYhdz

I was looking at a missing abs() transform and found my way to this
generalization of an existing fold that was added with D67799.
As discussed in that review, we want to make sure codegen handles
this difference well, and for all of the targets/types that I
spot-checked, it looks good.

I am leaving the existing fold in place in this commit because
it covers a potentially missing icmp fold, but I plan to remove
that as a follow-up commit as suggested during review.

Differential Revision: https://reviews.llvm.org/D111410
This commit is contained in:
Sanjay Patel
2021-10-15 16:22:59 -04:00
parent 3b48e1170d
commit 727e642e97
5 changed files with 55 additions and 39 deletions

View File

@@ -2062,14 +2062,24 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return SelectInst::Create(A, Op0, Constant::getNullValue(Ty));
// and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0.
if (match(&I, m_c_And(m_OneUse(m_AShr(
m_NSWSub(m_Value(Y), m_Value(X)),
m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
// TODO: This is a specific case of the more general pattern below, so it
// should be removed.
unsigned FullShift = Ty->getScalarSizeInBits() - 1;
if (match(&I, m_c_And(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)),
m_SpecificInt(FullShift))),
m_Deferred(X)))) {
Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty));
}
// (iN X s>> (N-1)) & Y --> (X < 0) ? Y : 0
if (match(&I, m_c_And(m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))),
m_Value(Y)))) {
Constant *Zero = ConstantInt::getNullValue(Ty);
Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
return SelectInst::Create(Cmp, Y, Zero);
}
// (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
if (sinkNotIntoOtherHandOfAndOrOr(I))
return &I;

View File

@@ -1403,8 +1403,8 @@ define <2 x i8> @flip_masked_bit_nonuniform(<2 x i8> %A) {
define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) {
; CHECK-LABEL: @ashr_bitwidth_mask(
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
; CHECK-NEXT: [[NEG_OR_ZERO:%.*]] = and i8 [[SIGN]], [[Y:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[NEG_OR_ZERO:%.*]] = select i1 [[ISNEG]], i8 [[Y:%.*]], i8 0
; CHECK-NEXT: ret i8 [[NEG_OR_ZERO]]
;
%sign = ashr i8 %x, 7
@@ -1415,8 +1415,8 @@ define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) {
define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
; CHECK-LABEL: @ashr_bitwidth_mask_vec_commute(
; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2>
; CHECK-NEXT: [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
; CHECK-NEXT: [[NEG_OR_ZERO:%.*]] = and <2 x i8> [[Y]], [[SIGN]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[NEG_OR_ZERO:%.*]] = select <2 x i1> [[ISNEG]], <2 x i8> [[Y]], <2 x i8> zeroinitializer
; CHECK-NEXT: ret <2 x i8> [[NEG_OR_ZERO]]
;
%y = mul <2 x i8> %py, <i8 42, i8 2> ; thwart complexity-based ordering
@@ -1425,6 +1425,8 @@ define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
ret <2 x i8> %neg_or_zero
}
; negative test - extra use
define i8 @ashr_bitwidth_mask_use(i8 %x, i8 %y) {
; CHECK-LABEL: @ashr_bitwidth_mask_use(
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
@@ -1438,6 +1440,8 @@ define i8 @ashr_bitwidth_mask_use(i8 %x, i8 %y) {
ret i8 %r
}
; negative test - wrong shift amount
define i8 @ashr_not_bitwidth_mask(i8 %x, i8 %y) {
; CHECK-LABEL: @ashr_not_bitwidth_mask(
; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 6
@@ -1449,6 +1453,8 @@ define i8 @ashr_not_bitwidth_mask(i8 %x, i8 %y) {
ret i8 %r
}
; negative test - wrong shift opcode
define i8 @lshr_bitwidth_mask(i8 %x, i8 %y) {
; CHECK-LABEL: @lshr_bitwidth_mask(
; CHECK-NEXT: [[SIGN:%.*]] = lshr i8 [[X:%.*]], 7

View File

@@ -100,8 +100,8 @@ define <2 x i1> @test5_zero() {
define i32 @test6(i32 %a, i32 %b) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[A_LOBIT_NEG:%.*]] = ashr i32 [[A:%.*]], 31
; CHECK-NEXT: [[F:%.*]] = and i32 [[A_LOBIT_NEG]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[F:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: ret i32 [[F]]
;
%c = icmp sle i32 %a, -1

View File

@@ -45,8 +45,8 @@ define i32 @neg(i32 %i) {
define i32 @test10(i32 %a, i32 %b) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: ret i32 [[E]]
;
%c = icmp slt i32 %a, 0
@@ -57,8 +57,8 @@ define i32 @test10(i32 %a, i32 %b) {
define i32 @test11(i32 %a, i32 %b) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: ret i32 [[E]]
;
%c = icmp sle i32 %a, -1
@@ -72,8 +72,8 @@ declare void @use32(i32)
define i32 @test12(i32 %a, i32 %b) {
; CHECK-LABEL: @test12(
; CHECK-NEXT: [[A_LOBIT:%.*]] = lshr i32 [[A:%.*]], 31
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: call void @use32(i32 [[A_LOBIT]])
; CHECK-NEXT: ret i32 [[E]]
;
@@ -310,8 +310,8 @@ define i32 @mul_bools_mixed_ext_use3(i1 %x, i1 %y) {
define i32 @signbit_mul(i32 %a, i32 %b) {
; CHECK-LABEL: @signbit_mul(
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: ret i32 [[E]]
;
%d = lshr i32 %a, 31
@@ -322,8 +322,8 @@ define i32 @signbit_mul(i32 %a, i32 %b) {
define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
; CHECK-LABEL: @signbit_mul_commute_extra_use(
; CHECK-NEXT: [[D:%.*]] = lshr i32 [[A:%.*]], 31
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: call void @use32(i32 [[D]])
; CHECK-NEXT: ret i32 [[E]]
;
@@ -337,8 +337,8 @@ define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: @signbit_mul_vec(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[E]]
;
%d = lshr <2 x i32> %a, <i32 31, i32 31>
@@ -348,8 +348,8 @@ define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: @signbit_mul_vec_commute(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[E]]
;
%d = lshr <2 x i32> %a, <i32 31, i32 31>

View File

@@ -45,8 +45,8 @@ define i32 @neg(i32 %i) {
define i32 @test10(i32 %a, i32 %b) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: ret i32 [[E]]
;
%c = icmp slt i32 %a, 0
@@ -57,8 +57,8 @@ define i32 @test10(i32 %a, i32 %b) {
define i32 @test11(i32 %a, i32 %b) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: ret i32 [[E]]
;
%c = icmp sle i32 %a, -1
@@ -72,8 +72,8 @@ declare void @use32(i32)
define i32 @test12(i32 %a, i32 %b) {
; CHECK-LABEL: @test12(
; CHECK-NEXT: [[A_LOBIT:%.*]] = lshr i32 [[A:%.*]], 31
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: call void @use32(i32 [[A_LOBIT]])
; CHECK-NEXT: ret i32 [[E]]
;
@@ -376,12 +376,12 @@ define i32 @mul_bools_mixed_ext_use3(i1 %x, i1 %y) {
ret i32 %r
}
; (A >>u 31) * B --> (A >>s 31) & B
; (A >>u 31) * B --> (A >>s 31) & B --> A < 0 ? B : 0
define i32 @signbit_mul(i32 %a, i32 %b) {
; CHECK-LABEL: @signbit_mul(
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: ret i32 [[E]]
;
%d = lshr i32 %a, 31
@@ -392,8 +392,8 @@ define i32 @signbit_mul(i32 %a, i32 %b) {
define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
; CHECK-LABEL: @signbit_mul_commute_extra_use(
; CHECK-NEXT: [[D:%.*]] = lshr i32 [[A:%.*]], 31
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A]], 31
; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
; CHECK-NEXT: [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
; CHECK-NEXT: call void @use32(i32 [[D]])
; CHECK-NEXT: ret i32 [[E]]
;
@@ -403,12 +403,12 @@ define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
ret i32 %e
}
; (A >>u 31)) * B --> (A >>s 31) & B
; (A >>u 31)) * B --> (A >>s 31) & B --> A < 0 ? B : 0
define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: @signbit_mul_vec(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[E]]
;
%d = lshr <2 x i32> %a, <i32 31, i32 31>
@@ -418,8 +418,8 @@ define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: @signbit_mul_vec_commute(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[E]]
;
%d = lshr <2 x i32> %a, <i32 31, i32 31>