This is the implementation of D149782 The patch implements a helper function that matches and fold the following cases in the DAGCombiner: 1. `bswap(logic_op(x, bswap(y))) -> logic_op(bswap(x), y)` 2. `bswap(logic_op(bswap(x), y)) -> logic_op(x, bswap(y))` 3. `bswap(logic_op(bswap(x), bswap(y))) -> logic_op(x, y)` in multiuse case, which still reduces the number of instructions. The helper function accepts SDValue with BSWAP and BITREVERSE opcode. This patch folds the BSWAP cases and remain the BITREVERSE optimization in the future Reviewed By: RKSimon, goldstein.w.n Differential Revision: https://reviews.llvm.org/D149783
84 lines
2.4 KiB
LLVM
84 lines
2.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mattr=v7 | FileCheck %s --check-prefixes=CHECK
|
|
|
|
declare i32 @llvm.bswap.i32(i32) readnone
|
|
declare i64 @llvm.bswap.i64(i64) readnone
|
|
declare i32 @llvm.bitreverse.i32(i32) readnone
|
|
|
|
define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
|
|
; CHECK-LABEL: bs_and_lhs_bs32:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: rev r1, r1
|
|
; CHECK-NEXT: ands r0, r1
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i32 @llvm.bswap.i32(i32 %a)
|
|
%2 = and i32 %1, %b
|
|
%3 = tail call i32 @llvm.bswap.i32(i32 %2)
|
|
ret i32 %3
|
|
}
|
|
|
|
define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 {
|
|
; CHECK-LABEL: bs_or_rhs_bs64:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: rev r1, r1
|
|
; CHECK-NEXT: rev r0, r0
|
|
; CHECK-NEXT: orrs r2, r1
|
|
; CHECK-NEXT: orr.w r1, r0, r3
|
|
; CHECK-NEXT: mov r0, r2
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i64 @llvm.bswap.i64(i64 %b)
|
|
%2 = or i64 %a, %1
|
|
%3 = tail call i64 @llvm.bswap.i64(i64 %2)
|
|
ret i64 %3
|
|
}
|
|
|
|
define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 {
|
|
; CHECK-LABEL: bs_and_all_operand_multiuse:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: and.w r2, r0, r1
|
|
; CHECK-NEXT: rev r0, r0
|
|
; CHECK-NEXT: rev r1, r1
|
|
; CHECK-NEXT: muls r0, r2, r0
|
|
; CHECK-NEXT: muls r0, r1, r0
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i32 @llvm.bswap.i32(i32 %a)
|
|
%2 = tail call i32 @llvm.bswap.i32(i32 %b)
|
|
%3 = and i32 %1, %2
|
|
%4 = tail call i32 @llvm.bswap.i32(i32 %3)
|
|
%5 = mul i32 %1, %4 ;increase use of left bswap
|
|
%6 = mul i32 %2, %5 ;increase use of right bswap
|
|
|
|
ret i32 %6
|
|
}
|
|
|
|
; negative test
|
|
define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 {
|
|
; CHECK-LABEL: bs_and_rhs_bs32_multiuse1:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: rev r1, r1
|
|
; CHECK-NEXT: ands r0, r1
|
|
; CHECK-NEXT: rev r1, r0
|
|
; CHECK-NEXT: muls r0, r1, r0
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i32 @llvm.bswap.i32(i32 %b)
|
|
%2 = and i32 %1, %a
|
|
%3 = tail call i32 @llvm.bswap.i32(i32 %2)
|
|
%4 = mul i32 %2, %3 ;increase use of logical op
|
|
ret i32 %4
|
|
}
|
|
|
|
; negative test
|
|
define i32 @bs_xor_rhs_brev32(i32 %a, i32 %b) #0 {
|
|
; CHECK-LABEL: bs_xor_rhs_brev32:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: rbit r1, r1
|
|
; CHECK-NEXT: eors r0, r1
|
|
; CHECK-NEXT: rev r0, r0
|
|
; CHECK-NEXT: bx lr
|
|
%1 = tail call i32 @llvm.bitreverse.i32(i32 %b)
|
|
%2 = xor i32 %a, %1
|
|
%3 = tail call i32 @llvm.bswap.i32(i32 %2)
|
|
ret i32 %3
|
|
}
|
|
|