[InstCombine] try to canonicalize logical shift after bswap
When shifting by a byte-multiple: bswap (shl X, C) --> lshr (bswap X), C bswap (lshr X, C) --> shl (bswap X), C This is an IR implementation of a transform suggested in D120648. The "swaps cancel" test models the motivating optimization from that proposal. Alive2 checks (as noted in the other review, we could use knownbits to handle shift-by-variable-amount, but that can be an enhancement patch): https://alive2.llvm.org/ce/z/pXUaRf https://alive2.llvm.org/ce/z/ZnaMLf Differential Revision: https://reviews.llvm.org/D122010
This commit is contained in:
@@ -1349,6 +1349,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
|
||||
Value *IIOperand = II->getArgOperand(0);
|
||||
Value *X = nullptr;
|
||||
|
||||
// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
|
||||
// inverse-shift-of-bswap:
|
||||
// bswap (shl X, C) --> lshr (bswap X), C
|
||||
// bswap (lshr X, C) --> shl (bswap X), C
|
||||
// TODO: Use knownbits to allow variable shift and non-splat vector match.
|
||||
BinaryOperator *BO;
|
||||
if (match(IIOperand, m_OneUse(m_BinOp(BO)))) {
|
||||
const APInt *C;
|
||||
if (match(BO, m_LogicalShift(m_Value(X), m_APIntAllowUndef(C))) &&
|
||||
(*C & 7) == 0) {
|
||||
Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
|
||||
BinaryOperator::BinaryOps InverseShift =
|
||||
BO->getOpcode() == Instruction::Shl ? Instruction::LShr
|
||||
: Instruction::Shl;
|
||||
return BinaryOperator::Create(InverseShift, NewSwap, BO->getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
KnownBits Known = computeKnownBits(IIOperand, 0, II);
|
||||
uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
|
||||
uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
|
||||
|
||||
@@ -26,8 +26,8 @@ define i32 @test6(i32 %a) {
|
||||
|
||||
define i32 @lshr8_i32(i32 %x) {
|
||||
; CHECK-LABEL: @lshr8_i32(
|
||||
; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 8
|
||||
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.bswap.i32(i32 [[S]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]])
|
||||
; CHECK-NEXT: [[R:%.*]] = shl i32 [[TMP1]], 8
|
||||
; CHECK-NEXT: ret i32 [[R]]
|
||||
;
|
||||
%s = lshr i32 %x, 8
|
||||
@@ -37,8 +37,8 @@ define i32 @lshr8_i32(i32 %x) {
|
||||
|
||||
define <2 x i32> @lshr16_v2i32(<2 x i32> %x) {
|
||||
; CHECK-LABEL: @lshr16_v2i32(
|
||||
; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 16, i32 16>
|
||||
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[S]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
|
||||
; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[TMP1]], <i32 16, i32 16>
|
||||
; CHECK-NEXT: ret <2 x i32> [[R]]
|
||||
;
|
||||
%s = lshr <2 x i32> %x, <i32 16, i32 16>
|
||||
@@ -48,14 +48,16 @@ define <2 x i32> @lshr16_v2i32(<2 x i32> %x) {
|
||||
|
||||
define i32 @lshr24_i32(i32 %x) {
|
||||
; CHECK-LABEL: @lshr24_i32(
|
||||
; CHECK-NEXT: [[S:%.*]] = and i32 [[X:%.*]], -16777216
|
||||
; CHECK-NEXT: ret i32 [[S]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -16777216
|
||||
; CHECK-NEXT: ret i32 [[TMP1]]
|
||||
;
|
||||
%s = lshr i32 %x, 24
|
||||
%r = call i32 @llvm.bswap.i32(i32 %s)
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; negative test - need shift-by-8-bit-multiple
|
||||
|
||||
define i32 @lshr12_i32(i32 %x) {
|
||||
; CHECK-LABEL: @lshr12_i32(
|
||||
; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 12
|
||||
@@ -67,6 +69,8 @@ define i32 @lshr12_i32(i32 %x) {
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; negative test - uses
|
||||
|
||||
define i32 @lshr8_i32_use(i32 %x, i32* %p) {
|
||||
; CHECK-LABEL: @lshr8_i32_use(
|
||||
; CHECK-NEXT: [[S:%.*]] = lshr i32 [[X:%.*]], 12
|
||||
@@ -82,8 +86,8 @@ define i32 @lshr8_i32_use(i32 %x, i32* %p) {
|
||||
|
||||
define i64 @shl16_i64(i64 %x) {
|
||||
; CHECK-LABEL: @shl16_i64(
|
||||
; CHECK-NEXT: [[S:%.*]] = shl i64 [[X:%.*]], 16
|
||||
; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.bswap.i64(i64 [[S]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[X:%.*]])
|
||||
; CHECK-NEXT: [[R:%.*]] = lshr i64 [[TMP1]], 16
|
||||
; CHECK-NEXT: ret i64 [[R]]
|
||||
;
|
||||
%s = shl i64 %x, 16
|
||||
@@ -91,10 +95,12 @@ define i64 @shl16_i64(i64 %x) {
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; poison vector element propagates
|
||||
|
||||
define <2 x i64> @shl16_v2i64(<2 x i64> %x) {
|
||||
; CHECK-LABEL: @shl16_v2i64(
|
||||
; CHECK-NEXT: [[S:%.*]] = shl <2 x i64> [[X:%.*]], <i64 poison, i64 24>
|
||||
; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[S]])
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[X:%.*]])
|
||||
; CHECK-NEXT: [[R:%.*]] = lshr <2 x i64> [[TMP1]], <i64 poison, i64 24>
|
||||
; CHECK-NEXT: ret <2 x i64> [[R]]
|
||||
;
|
||||
%s = shl <2 x i64> %x, <i64 poison, i64 24>
|
||||
@@ -104,14 +110,16 @@ define <2 x i64> @shl16_v2i64(<2 x i64> %x) {
|
||||
|
||||
define i64 @shl56_i64(i64 %x) {
|
||||
; CHECK-LABEL: @shl56_i64(
|
||||
; CHECK-NEXT: [[S:%.*]] = and i64 [[X:%.*]], 255
|
||||
; CHECK-NEXT: ret i64 [[S]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 255
|
||||
; CHECK-NEXT: ret i64 [[TMP1]]
|
||||
;
|
||||
%s = shl i64 %x, 56
|
||||
%r = call i64 @llvm.bswap.i64(i64 %s)
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; negative test - need shift-by-8-bit-multiple
|
||||
|
||||
define i64 @shl42_i64(i64 %x) {
|
||||
; CHECK-LABEL: @shl42_i64(
|
||||
; CHECK-NEXT: [[S:%.*]] = shl i64 [[X:%.*]], 42
|
||||
@@ -123,6 +131,8 @@ define i64 @shl42_i64(i64 %x) {
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; negative test - uses
|
||||
|
||||
define i32 @shl8_i32_use(i32 %x, i32* %p) {
|
||||
; CHECK-LABEL: @shl8_i32_use(
|
||||
; CHECK-NEXT: [[S:%.*]] = shl i32 [[X:%.*]], 8
|
||||
@@ -136,11 +146,11 @@ define i32 @shl8_i32_use(i32 %x, i32* %p) {
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; swaps cancel
|
||||
|
||||
define i64 @swap_shl16_i64(i64 %x) {
|
||||
; CHECK-LABEL: @swap_shl16_i64(
|
||||
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.bswap.i64(i64 [[X:%.*]])
|
||||
; CHECK-NEXT: [[S:%.*]] = shl i64 [[B]], 16
|
||||
; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.bswap.i64(i64 [[S]])
|
||||
; CHECK-NEXT: [[R:%.*]] = lshr i64 [[X:%.*]], 16
|
||||
; CHECK-NEXT: ret i64 [[R]]
|
||||
;
|
||||
%b = call i64 @llvm.bswap.i64(i64 %x)
|
||||
@@ -536,11 +546,11 @@ define <2 x i64> @bs_active_high_different_negative(<2 x i64> %0) {
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
|
||||
; negative test
|
||||
; TODO: This should fold to 'and'.
|
||||
define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
|
||||
; CHECK-LABEL: @bs_active_high_undef(
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 undef>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP0:%.*]])
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP2]], <i64 56, i64 undef>
|
||||
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
|
||||
;
|
||||
%2 = shl <2 x i64> %0, <i64 56, i64 undef>
|
||||
|
||||
Reference in New Issue
Block a user