[InstCombine] generalize fold for mask-with-signbit-splat

(iN X s>> (N-1)) & Y --> (X < 0) ? Y : 0 https://alive2.llvm.org/ce/z/qeYhdz I was looking at a missing abs() transform and found my way to this generalization of an existing fold that was added with D67799. As discussed in that review, we want to make sure codegen handles this difference well, and for all of the targets/types that I spot-checked, it looks good. I am leaving the existing fold in place in this commit because it covers a potentially missing icmp fold, but I plan to remove that as a follow-up commit as suggested during review. Differential Revision: https://reviews.llvm.org/D111410
2021-10-15 16:22:59 -04:00
parent 3b48e1170d
commit 727e642e97
5 changed files with 55 additions and 39 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2062,14 +2062,24 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
    return SelectInst::Create(A, Op0, Constant::getNullValue(Ty));

  // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0.
-  if (match(&I, m_c_And(m_OneUse(m_AShr(
-                            m_NSWSub(m_Value(Y), m_Value(X)),
-                            m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
+  // TODO: This is a specific case of the more general pattern below, so it
+  //       should be removed.
+  unsigned FullShift = Ty->getScalarSizeInBits() - 1;
+  if (match(&I, m_c_And(m_OneUse(m_AShr(m_NSWSub(m_Value(Y), m_Value(X)),
+                                        m_SpecificInt(FullShift))),
                        m_Deferred(X)))) {
    Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
    return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty));
  }

+  // (iN X s>> (N-1)) & Y --> (X < 0) ? Y : 0
+  if (match(&I, m_c_And(m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))),
+                        m_Value(Y)))) {
+    Constant *Zero = ConstantInt::getNullValue(Ty);
+    Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
+    return SelectInst::Create(Cmp, Y, Zero);
+  }
+
  // (~x) & y  -->  ~(x | (~y))  iff that gets rid of inversions
  if (sinkNotIntoOtherHandOfAndOrOr(I))
    return &I;
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1403,8 +1403,8 @@ define <2 x i8> @flip_masked_bit_nonuniform(<2 x i8> %A) {

 define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_bitwidth_mask(
-; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
-; CHECK-NEXT:    [[NEG_OR_ZERO:%.*]] = and i8 [[SIGN]], [[Y:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG_OR_ZERO:%.*]] = select i1 [[ISNEG]], i8 [[Y:%.*]], i8 0
 ; CHECK-NEXT:    ret i8 [[NEG_OR_ZERO]]
 ;
  %sign = ashr i8 %x, 7
@@ -1415,8 +1415,8 @@ define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) {
 define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
 ; CHECK-LABEL: @ashr_bitwidth_mask_vec_commute(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2>
-; CHECK-NEXT:    [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
-; CHECK-NEXT:    [[NEG_OR_ZERO:%.*]] = and <2 x i8> [[Y]], [[SIGN]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG_OR_ZERO:%.*]] = select <2 x i1> [[ISNEG]], <2 x i8> [[Y]], <2 x i8> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i8> [[NEG_OR_ZERO]]
 ;
  %y = mul <2 x i8> %py, <i8 42, i8 2>      ; thwart complexity-based ordering
@@ -1425,6 +1425,8 @@ define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
  ret <2 x i8> %neg_or_zero
 }

+; negative test - extra use
+
 define i8 @ashr_bitwidth_mask_use(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_bitwidth_mask_use(
 ; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7
@@ -1438,6 +1440,8 @@ define i8 @ashr_bitwidth_mask_use(i8 %x, i8 %y) {
  ret i8 %r
 }

+; negative test - wrong shift amount
+
 define i8 @ashr_not_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_not_bitwidth_mask(
 ; CHECK-NEXT:    [[SIGN:%.*]] = ashr i8 [[X:%.*]], 6
@@ -1449,6 +1453,8 @@ define i8 @ashr_not_bitwidth_mask(i8 %x, i8 %y) {
  ret i8 %r
 }

+; negative test - wrong shift opcode
+
 define i8 @lshr_bitwidth_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @lshr_bitwidth_mask(
 ; CHECK-NEXT:    [[SIGN:%.*]] = lshr i8 [[X:%.*]], 7
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -100,8 +100,8 @@ define <2 x i1> @test5_zero() {

 define i32 @test6(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[A_LOBIT_NEG:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[F:%.*]] = and i32 [[A_LOBIT_NEG]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[F:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[F]]
 ;
  %c = icmp sle i32 %a, -1
--- a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
@@ -45,8 +45,8 @@ define i32 @neg(i32 %i) {

 define i32 @test10(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
  %c = icmp slt i32 %a, 0
@@ -57,8 +57,8 @@ define i32 @test10(i32 %a, i32 %b) {

 define i32 @test11(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
  %c = icmp sle i32 %a, -1
@@ -72,8 +72,8 @@ declare void @use32(i32)
 define i32 @test12(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:    [[A_LOBIT:%.*]] = lshr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[A_LOBIT]])
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -310,8 +310,8 @@ define i32 @mul_bools_mixed_ext_use3(i1 %x, i1 %y) {

 define i32 @signbit_mul(i32 %a, i32 %b) {
 ; CHECK-LABEL: @signbit_mul(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
  %d = lshr i32 %a, 31
@@ -322,8 +322,8 @@ define i32 @signbit_mul(i32 %a, i32 %b) {
 define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
 ; CHECK-LABEL: @signbit_mul_commute_extra_use(
 ; CHECK-NEXT:    [[D:%.*]] = lshr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[D]])
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -337,8 +337,8 @@ define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {

 define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @signbit_mul_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
  %d = lshr <2 x i32> %a, <i32 31, i32 31>
@@ -348,8 +348,8 @@ define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {

 define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @signbit_mul_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
  %d = lshr <2 x i32> %a, <i32 31, i32 31>
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -45,8 +45,8 @@ define i32 @neg(i32 %i) {

 define i32 @test10(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
  %c = icmp slt i32 %a, 0
@@ -57,8 +57,8 @@ define i32 @test10(i32 %a, i32 %b) {

 define i32 @test11(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
  %c = icmp sle i32 %a, -1
@@ -72,8 +72,8 @@ declare void @use32(i32)
 define i32 @test12(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:    [[A_LOBIT:%.*]] = lshr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[A_LOBIT]])
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -376,12 +376,12 @@ define i32 @mul_bools_mixed_ext_use3(i1 %x, i1 %y) {
  ret i32 %r
 }

-; (A >>u 31) * B --> (A >>s 31) & B
+; (A >>u 31) * B --> (A >>s 31) & B --> A < 0 ? B : 0

 define i32 @signbit_mul(i32 %a, i32 %b) {
 ; CHECK-LABEL: @signbit_mul(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
  %d = lshr i32 %a, 31
@@ -392,8 +392,8 @@ define i32 @signbit_mul(i32 %a, i32 %b) {
 define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
 ; CHECK-LABEL: @signbit_mul_commute_extra_use(
 ; CHECK-NEXT:    [[D:%.*]] = lshr i32 [[A:%.*]], 31
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[A]], 31
-; CHECK-NEXT:    [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[E:%.*]] = select i1 [[ISNEG]], i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[D]])
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -403,12 +403,12 @@ define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) {
  ret i32 %e
 }

-; (A >>u 31)) * B --> (A >>s 31) & B
+; (A >>u 31)) * B --> (A >>s 31) & B --> A < 0 ? B : 0

 define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @signbit_mul_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
  %d = lshr <2 x i32> %a, <i32 31, i32 31>
@@ -418,8 +418,8 @@ define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) {

 define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @signbit_mul_vec_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[E:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> [[B:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
  %d = lshr <2 x i32> %a, <i32 31, i32 31>