Files
clang-p2996/llvm/test/CodeGen/ARM/and-cmp0-sink.ll
Momchil Velikov 6602110152 [ARM] Enable and/cmp0 folding
The `CodeGenPrepare` pass can sink bitwise `and` used by compare to
zero into the basic blocks where the users are. This operation is
guarded by lowering hook, which is disabled for ARM.  In the ARM
architecture versions from v7-M up these two operations can be folded
into `tst rN, #imm` instruction. Sinking of `and` can also enable
the cmov-to-bfi DAG combiner.

This patch fixes some benchmark regressions caused
by https://reviews.llvm.org/D129370 as well scoring slightly better overall.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D134360
2022-09-26 11:31:23 +01:00

397 lines
10 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
; Test sink of `and` instructions to fold in to `tst`, `lsls`, do cmov-bfi combine, etc.
define void @f(i32 %v, ptr noalias %outp) {
; V7M-LABEL: f:
; V7M: @ %bb.0: @ %entry
; V7M-NEXT: movs r2, #0
; V7M-NEXT: str r2, [r1]
; V7M-NEXT: lsls r2, r0, #31
; V7M-NEXT: bne .LBB0_3
; V7M-NEXT: @ %bb.1: @ %if.then
; V7M-NEXT: tst.w r0, #14
; V7M-NEXT: beq .LBB0_6
; V7M-NEXT: @ %bb.2:
; V7M-NEXT: lsls r2, r0, #30
; V7M-NEXT: mov.w r3, #33024
; V7M-NEXT: and.w r2, r3, r2, asr #31
; V7M-NEXT: lsrs r0, r0, #2
; V7M-NEXT: bfi r2, r0, #7, #1
; V7M-NEXT: bfi r2, r0, #14, #1
; V7M-NEXT: b .LBB0_5
; V7M-NEXT: .LBB0_3: @ %if.else
; V7M-NEXT: tst.w r0, #14
; V7M-NEXT: it eq
; V7M-NEXT: bxeq lr
; V7M-NEXT: .LBB0_4:
; V7M-NEXT: lsls r2, r0, #30
; V7M-NEXT: mov.w r3, #8256
; V7M-NEXT: and.w r2, r3, r2, asr #31
; V7M-NEXT: lsrs r0, r0, #2
; V7M-NEXT: bfi r2, r0, #5, #1
; V7M-NEXT: bfi r2, r0, #12, #1
; V7M-NEXT: .LBB0_5: @ %if.end
; V7M-NEXT: str r2, [r1]
; V7M-NEXT: .LBB0_6: @ %exit
; V7M-NEXT: bx lr
;
; V7A-LABEL: f:
; V7A: @ %bb.0: @ %entry
; V7A-NEXT: mov r2, #0
; V7A-NEXT: tst r0, #1
; V7A-NEXT: str r2, [r1]
; V7A-NEXT: bne .LBB0_3
; V7A-NEXT: @ %bb.1: @ %if.then
; V7A-NEXT: tst r0, #14
; V7A-NEXT: beq .LBB0_6
; V7A-NEXT: @ %bb.2:
; V7A-NEXT: lsl r2, r0, #30
; V7A-NEXT: mov r3, #33024
; V7A-NEXT: and r2, r3, r2, asr #31
; V7A-NEXT: lsr r0, r0, #2
; V7A-NEXT: bfi r2, r0, #7, #1
; V7A-NEXT: bfi r2, r0, #14, #1
; V7A-NEXT: b .LBB0_5
; V7A-NEXT: .LBB0_3: @ %if.else
; V7A-NEXT: tst r0, #14
; V7A-NEXT: bxeq lr
; V7A-NEXT: .LBB0_4:
; V7A-NEXT: lsl r2, r0, #30
; V7A-NEXT: mov r3, #8256
; V7A-NEXT: and r2, r3, r2, asr #31
; V7A-NEXT: lsr r0, r0, #2
; V7A-NEXT: bfi r2, r0, #5, #1
; V7A-NEXT: bfi r2, r0, #12, #1
; V7A-NEXT: .LBB0_5: @ %if.end
; V7A-NEXT: str r2, [r1]
; V7A-NEXT: .LBB0_6: @ %exit
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: f:
; V7A-T: @ %bb.0: @ %entry
; V7A-T-NEXT: movs r2, #0
; V7A-T-NEXT: str r2, [r1]
; V7A-T-NEXT: lsls r2, r0, #31
; V7A-T-NEXT: bne .LBB0_3
; V7A-T-NEXT: @ %bb.1: @ %if.then
; V7A-T-NEXT: tst.w r0, #14
; V7A-T-NEXT: beq .LBB0_6
; V7A-T-NEXT: @ %bb.2:
; V7A-T-NEXT: lsls r2, r0, #30
; V7A-T-NEXT: mov.w r3, #33024
; V7A-T-NEXT: and.w r2, r3, r2, asr #31
; V7A-T-NEXT: lsrs r0, r0, #2
; V7A-T-NEXT: bfi r2, r0, #7, #1
; V7A-T-NEXT: bfi r2, r0, #14, #1
; V7A-T-NEXT: b .LBB0_5
; V7A-T-NEXT: .LBB0_3: @ %if.else
; V7A-T-NEXT: tst.w r0, #14
; V7A-T-NEXT: it eq
; V7A-T-NEXT: bxeq lr
; V7A-T-NEXT: .LBB0_4:
; V7A-T-NEXT: lsls r2, r0, #30
; V7A-T-NEXT: mov.w r3, #8256
; V7A-T-NEXT: and.w r2, r3, r2, asr #31
; V7A-T-NEXT: lsrs r0, r0, #2
; V7A-T-NEXT: bfi r2, r0, #5, #1
; V7A-T-NEXT: bfi r2, r0, #12, #1
; V7A-T-NEXT: .LBB0_5: @ %if.end
; V7A-T-NEXT: str r2, [r1]
; V7A-T-NEXT: .LBB0_6: @ %exit
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: f:
; V6M: @ %bb.0: @ %entry
; V6M-NEXT: .save {r4, lr}
; V6M-NEXT: push {r4, lr}
; V6M-NEXT: movs r2, #0
; V6M-NEXT: str r2, [r1]
; V6M-NEXT: movs r3, #14
; V6M-NEXT: ands r3, r0
; V6M-NEXT: movs r4, #4
; V6M-NEXT: ands r4, r0
; V6M-NEXT: movs r2, #2
; V6M-NEXT: ands r2, r0
; V6M-NEXT: lsls r0, r0, #31
; V6M-NEXT: bne .LBB0_5
; V6M-NEXT: @ %bb.1: @ %if.then
; V6M-NEXT: movs r0, #129
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: beq .LBB0_3
; V6M-NEXT: @ %bb.2:
; V6M-NEXT: lsls r2, r0, #8
; V6M-NEXT: .LBB0_3: @ %if.then
; V6M-NEXT: cmp r4, #0
; V6M-NEXT: beq .LBB0_10
; V6M-NEXT: @ %bb.4: @ %if.then
; V6M-NEXT: lsls r0, r0, #7
; V6M-NEXT: b .LBB0_9
; V6M-NEXT: .LBB0_5: @ %if.else
; V6M-NEXT: movs r0, #129
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: beq .LBB0_7
; V6M-NEXT: @ %bb.6:
; V6M-NEXT: lsls r2, r0, #6
; V6M-NEXT: .LBB0_7: @ %if.else
; V6M-NEXT: cmp r4, #0
; V6M-NEXT: beq .LBB0_10
; V6M-NEXT: @ %bb.8: @ %if.else
; V6M-NEXT: lsls r0, r0, #5
; V6M-NEXT: .LBB0_9: @ %if.else
; V6M-NEXT: adds r2, r2, r0
; V6M-NEXT: .LBB0_10: @ %if.else
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: beq .LBB0_12
; V6M-NEXT: @ %bb.11: @ %if.end
; V6M-NEXT: str r2, [r1]
; V6M-NEXT: .LBB0_12: @ %exit
; V6M-NEXT: pop {r4, pc}
entry:
store i32 0, ptr %outp, align 4
%and = and i32 %v, 1
%cmp = icmp eq i32 %and, 0
%and1 = and i32 %v, 2
%tobool.not = icmp eq i32 %and1, 0
%and2 = and i32 %v, 4
%tobool1.not = icmp eq i32 %and2, 0
%and3 = and i32 %v, 14
%tobool2.not = icmp eq i32 %and3, 0
br i1 %cmp, label %if.then, label %if.else
if.then:
%select = select i1 %tobool.not, i32 0, i32 33024
%or = or i32 %select, 16512
%spec.select = select i1 %tobool1.not, i32 %select, i32 %or
br i1 %tobool2.not, label %exit, label %if.end
if.else:
%select1 = select i1 %tobool.not, i32 0, i32 8256
%or1 = or i32 %select1, 4128
%spec.select1 = select i1 %tobool1.not, i32 %select1, i32 %or1
br i1 %tobool2.not, label %exit, label %if.end
if.end:
%spec.select.sink = phi i32 [ %spec.select, %if.then ], [ %spec.select1, %if.else ]
store i32 %spec.select.sink, ptr %outp, align 4
br label %exit
exit:
ret void
}
; Test with a mask that can be encoded with T32 instruction set, but not with A32.
define i32 @f0(i1 %c0, i32 %v) {
; V7M-LABEL: f0:
; V7M: @ %bb.0: @ %E
; V7M-NEXT: lsls r0, r0, #31
; V7M-NEXT: beq .LBB1_2
; V7M-NEXT: @ %bb.1: @ %A
; V7M-NEXT: tst.w r1, #16843009
; V7M-NEXT: itt eq
; V7M-NEXT: moveq r0, #0
; V7M-NEXT: bxeq lr
; V7M-NEXT: b .LBB1_3
; V7M-NEXT: .LBB1_2: @ %B
; V7M-NEXT: tst.w r1, #16843009
; V7M-NEXT: itt ne
; V7M-NEXT: movne r0, #0
; V7M-NEXT: bxne lr
; V7M-NEXT: .LBB1_3: @ %D
; V7M-NEXT: movs r0, #1
; V7M-NEXT: bx lr
;
; V7A-LABEL: f0:
; V7A: @ %bb.0: @ %E
; V7A-NEXT: movw r2, #257
; V7A-NEXT: tst r0, #1
; V7A-NEXT: movt r2, #257
; V7A-NEXT: and r1, r1, r2
; V7A-NEXT: beq .LBB1_3
; V7A-NEXT: @ %bb.1: @ %A
; V7A-NEXT: cmp r1, #0
; V7A-NEXT: moveq r0, #0
; V7A-NEXT: bxeq lr
; V7A-NEXT: .LBB1_2: @ %D
; V7A-NEXT: mov r0, #1
; V7A-NEXT: bx lr
; V7A-NEXT: .LBB1_3: @ %B
; V7A-NEXT: mov r0, #0
; V7A-NEXT: cmp r1, #0
; V7A-NEXT: moveq r0, #1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: f0:
; V7A-T: @ %bb.0: @ %E
; V7A-T-NEXT: lsls r0, r0, #31
; V7A-T-NEXT: beq .LBB1_2
; V7A-T-NEXT: @ %bb.1: @ %A
; V7A-T-NEXT: tst.w r1, #16843009
; V7A-T-NEXT: itt eq
; V7A-T-NEXT: moveq r0, #0
; V7A-T-NEXT: bxeq lr
; V7A-T-NEXT: b .LBB1_3
; V7A-T-NEXT: .LBB1_2: @ %B
; V7A-T-NEXT: tst.w r1, #16843009
; V7A-T-NEXT: itt ne
; V7A-T-NEXT: movne r0, #0
; V7A-T-NEXT: bxne lr
; V7A-T-NEXT: .LBB1_3: @ %D
; V7A-T-NEXT: movs r0, #1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: f0:
; V6M: @ %bb.0: @ %E
; V6M-NEXT: ldr r2, .LCPI1_0
; V6M-NEXT: ands r2, r1
; V6M-NEXT: lsls r0, r0, #31
; V6M-NEXT: beq .LBB1_3
; V6M-NEXT: @ %bb.1: @ %A
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: bne .LBB1_5
; V6M-NEXT: @ %bb.2:
; V6M-NEXT: movs r0, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .LBB1_3: @ %B
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: beq .LBB1_5
; V6M-NEXT: @ %bb.4:
; V6M-NEXT: movs r0, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .LBB1_5: @ %D
; V6M-NEXT: movs r0, #1
; V6M-NEXT: bx lr
; V6M-NEXT: .p2align 2
; V6M-NEXT: @ %bb.6:
; V6M-NEXT: .LCPI1_0:
; V6M-NEXT: .long 16843009 @ 0x1010101
E:
%a = and i32 %v, 16843009
br i1 %c0, label %A, label %B
A:
%c1 = icmp eq i32 %a, 0
br i1 %c1, label %C, label %D
B:
%c2 = icmp eq i32 %a, 0
br i1 %c2, label %D, label %C
C:
br label %X
D:
br label %X
X:
%x = phi i32 [0, %C], [1, %D]
ret i32 %x
}
; Test with a mask that can be encoded both with T32 and A32 instruction sets.
define i32 @f1(i1 %c0, i32 %v) {
; V7M-LABEL: f1:
; V7M: @ %bb.0: @ %E
; V7M-NEXT: lsls r0, r0, #31
; V7M-NEXT: beq .LBB2_2
; V7M-NEXT: @ %bb.1: @ %A
; V7M-NEXT: tst.w r1, #100663296
; V7M-NEXT: itt eq
; V7M-NEXT: moveq r0, #0
; V7M-NEXT: bxeq lr
; V7M-NEXT: b .LBB2_3
; V7M-NEXT: .LBB2_2: @ %B
; V7M-NEXT: tst.w r1, #100663296
; V7M-NEXT: itt ne
; V7M-NEXT: movne r0, #0
; V7M-NEXT: bxne lr
; V7M-NEXT: .LBB2_3: @ %D
; V7M-NEXT: movs r0, #1
; V7M-NEXT: bx lr
;
; V7A-LABEL: f1:
; V7A: @ %bb.0: @ %E
; V7A-NEXT: tst r0, #1
; V7A-NEXT: beq .LBB2_3
; V7A-NEXT: @ %bb.1: @ %A
; V7A-NEXT: tst r1, #100663296
; V7A-NEXT: moveq r0, #0
; V7A-NEXT: bxeq lr
; V7A-NEXT: .LBB2_2: @ %D
; V7A-NEXT: mov r0, #1
; V7A-NEXT: bx lr
; V7A-NEXT: .LBB2_3: @ %B
; V7A-NEXT: mov r0, #0
; V7A-NEXT: tst r1, #100663296
; V7A-NEXT: moveq r0, #1
; V7A-NEXT: bx lr
;
; V7A-T-LABEL: f1:
; V7A-T: @ %bb.0: @ %E
; V7A-T-NEXT: lsls r0, r0, #31
; V7A-T-NEXT: beq .LBB2_2
; V7A-T-NEXT: @ %bb.1: @ %A
; V7A-T-NEXT: tst.w r1, #100663296
; V7A-T-NEXT: itt eq
; V7A-T-NEXT: moveq r0, #0
; V7A-T-NEXT: bxeq lr
; V7A-T-NEXT: b .LBB2_3
; V7A-T-NEXT: .LBB2_2: @ %B
; V7A-T-NEXT: tst.w r1, #100663296
; V7A-T-NEXT: itt ne
; V7A-T-NEXT: movne r0, #0
; V7A-T-NEXT: bxne lr
; V7A-T-NEXT: .LBB2_3: @ %D
; V7A-T-NEXT: movs r0, #1
; V7A-T-NEXT: bx lr
;
; V6M-LABEL: f1:
; V6M: @ %bb.0: @ %E
; V6M-NEXT: movs r2, #3
; V6M-NEXT: lsls r2, r2, #25
; V6M-NEXT: ands r2, r1
; V6M-NEXT: lsls r0, r0, #31
; V6M-NEXT: beq .LBB2_3
; V6M-NEXT: @ %bb.1: @ %A
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: bne .LBB2_5
; V6M-NEXT: @ %bb.2:
; V6M-NEXT: movs r0, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .LBB2_3: @ %B
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: beq .LBB2_5
; V6M-NEXT: @ %bb.4:
; V6M-NEXT: movs r0, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .LBB2_5: @ %D
; V6M-NEXT: movs r0, #1
; V6M-NEXT: bx lr
E:
%a = and i32 %v, 100663296
br i1 %c0, label %A, label %B
A:
%c1 = icmp eq i32 %a, 0
br i1 %c1, label %C, label %D
B:
%c2 = icmp eq i32 %a, 0
br i1 %c2, label %D, label %C
C:
br label %X
D:
br label %X
X:
%x = phi i32 [0, %C], [1, %D]
ret i32 %x
}