Files
clang-p2996/llvm/test/CodeGen/ARM/combine-bswap.ll
Austin Chang d069ac035a [DAGCombiner] Add bswap(logic_op(bswap(x), y)) optimization
This is the implementation of D149782

The patch implements a helper function that matches and fold the following cases in the DAGCombiner:

1. `bswap(logic_op(x, bswap(y))) -> logic_op(bswap(x), y)`
2. `bswap(logic_op(bswap(x), y)) -> logic_op(x, bswap(y))`
3. `bswap(logic_op(bswap(x), bswap(y))) -> logic_op(x, y)` in multiuse case, which still reduces the number of instructions.

The helper function accepts SDValue with BSWAP and BITREVERSE opcode. This patch folds the BSWAP cases and remain the BITREVERSE optimization in the future

Reviewed By: RKSimon, goldstein.w.n

Differential Revision: https://reviews.llvm.org/D149783
2023-05-16 18:58:07 -05:00

84 lines
2.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv7m-none-eabi -mattr=v7 | FileCheck %s --check-prefixes=CHECK
declare i32 @llvm.bswap.i32(i32) readnone
declare i64 @llvm.bswap.i64(i64) readnone
declare i32 @llvm.bitreverse.i32(i32) readnone
define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
; CHECK-LABEL: bs_and_lhs_bs32:
; CHECK: @ %bb.0:
; CHECK-NEXT: rev r1, r1
; CHECK-NEXT: ands r0, r1
; CHECK-NEXT: bx lr
%1 = tail call i32 @llvm.bswap.i32(i32 %a)
%2 = and i32 %1, %b
%3 = tail call i32 @llvm.bswap.i32(i32 %2)
ret i32 %3
}
define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 {
; CHECK-LABEL: bs_or_rhs_bs64:
; CHECK: @ %bb.0:
; CHECK-NEXT: rev r1, r1
; CHECK-NEXT: rev r0, r0
; CHECK-NEXT: orrs r2, r1
; CHECK-NEXT: orr.w r1, r0, r3
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: bx lr
%1 = tail call i64 @llvm.bswap.i64(i64 %b)
%2 = or i64 %a, %1
%3 = tail call i64 @llvm.bswap.i64(i64 %2)
ret i64 %3
}
define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 {
; CHECK-LABEL: bs_and_all_operand_multiuse:
; CHECK: @ %bb.0:
; CHECK-NEXT: and.w r2, r0, r1
; CHECK-NEXT: rev r0, r0
; CHECK-NEXT: rev r1, r1
; CHECK-NEXT: muls r0, r2, r0
; CHECK-NEXT: muls r0, r1, r0
; CHECK-NEXT: bx lr
%1 = tail call i32 @llvm.bswap.i32(i32 %a)
%2 = tail call i32 @llvm.bswap.i32(i32 %b)
%3 = and i32 %1, %2
%4 = tail call i32 @llvm.bswap.i32(i32 %3)
%5 = mul i32 %1, %4 ;increase use of left bswap
%6 = mul i32 %2, %5 ;increase use of right bswap
ret i32 %6
}
; negative test
define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 {
; CHECK-LABEL: bs_and_rhs_bs32_multiuse1:
; CHECK: @ %bb.0:
; CHECK-NEXT: rev r1, r1
; CHECK-NEXT: ands r0, r1
; CHECK-NEXT: rev r1, r0
; CHECK-NEXT: muls r0, r1, r0
; CHECK-NEXT: bx lr
%1 = tail call i32 @llvm.bswap.i32(i32 %b)
%2 = and i32 %1, %a
%3 = tail call i32 @llvm.bswap.i32(i32 %2)
%4 = mul i32 %2, %3 ;increase use of logical op
ret i32 %4
}
; negative test
define i32 @bs_xor_rhs_brev32(i32 %a, i32 %b) #0 {
; CHECK-LABEL: bs_xor_rhs_brev32:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r1, r1
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: rev r0, r0
; CHECK-NEXT: bx lr
%1 = tail call i32 @llvm.bitreverse.i32(i32 %b)
%2 = xor i32 %a, %1
%3 = tail call i32 @llvm.bswap.i32(i32 %2)
ret i32 %3
}