The `CodeGenPrepare` pass can sink bitwise `and` used by compare to zero into the basic blocks where the users are. This operation is guarded by lowering hook, which is disabled for ARM. In the ARM architecture versions from v7-M up these two operations can be folded into `tst rN, #imm` instruction. Sinking of `and` can also enable the cmov-to-bfi DAG combiner. This patch fixes some benchmark regressions caused by https://reviews.llvm.org/D129370 as well scoring slightly better overall. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D134360
397 lines
10 KiB
LLVM
397 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s --check-prefix V7M
|
|
; RUN: llc -mtriple=armv7a-eabi %s -o - | FileCheck %s --check-prefix V7A
|
|
; RUN: llc -mtriple=thumbv7a-eabi %s -o - | FileCheck %s --check-prefix V7A-T
|
|
; RUN: llc -mtriple=armv6m-eabi %s -o - | FileCheck %s --check-prefix V6M
|
|
|
|
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
|
|
; Test sink of `and` instructions to fold in to `tst`, `lsls`, do cmov-bfi combine, etc.
|
|
define void @f(i32 %v, ptr noalias %outp) {
|
|
; V7M-LABEL: f:
|
|
; V7M: @ %bb.0: @ %entry
|
|
; V7M-NEXT: movs r2, #0
|
|
; V7M-NEXT: str r2, [r1]
|
|
; V7M-NEXT: lsls r2, r0, #31
|
|
; V7M-NEXT: bne .LBB0_3
|
|
; V7M-NEXT: @ %bb.1: @ %if.then
|
|
; V7M-NEXT: tst.w r0, #14
|
|
; V7M-NEXT: beq .LBB0_6
|
|
; V7M-NEXT: @ %bb.2:
|
|
; V7M-NEXT: lsls r2, r0, #30
|
|
; V7M-NEXT: mov.w r3, #33024
|
|
; V7M-NEXT: and.w r2, r3, r2, asr #31
|
|
; V7M-NEXT: lsrs r0, r0, #2
|
|
; V7M-NEXT: bfi r2, r0, #7, #1
|
|
; V7M-NEXT: bfi r2, r0, #14, #1
|
|
; V7M-NEXT: b .LBB0_5
|
|
; V7M-NEXT: .LBB0_3: @ %if.else
|
|
; V7M-NEXT: tst.w r0, #14
|
|
; V7M-NEXT: it eq
|
|
; V7M-NEXT: bxeq lr
|
|
; V7M-NEXT: .LBB0_4:
|
|
; V7M-NEXT: lsls r2, r0, #30
|
|
; V7M-NEXT: mov.w r3, #8256
|
|
; V7M-NEXT: and.w r2, r3, r2, asr #31
|
|
; V7M-NEXT: lsrs r0, r0, #2
|
|
; V7M-NEXT: bfi r2, r0, #5, #1
|
|
; V7M-NEXT: bfi r2, r0, #12, #1
|
|
; V7M-NEXT: .LBB0_5: @ %if.end
|
|
; V7M-NEXT: str r2, [r1]
|
|
; V7M-NEXT: .LBB0_6: @ %exit
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: f:
|
|
; V7A: @ %bb.0: @ %entry
|
|
; V7A-NEXT: mov r2, #0
|
|
; V7A-NEXT: tst r0, #1
|
|
; V7A-NEXT: str r2, [r1]
|
|
; V7A-NEXT: bne .LBB0_3
|
|
; V7A-NEXT: @ %bb.1: @ %if.then
|
|
; V7A-NEXT: tst r0, #14
|
|
; V7A-NEXT: beq .LBB0_6
|
|
; V7A-NEXT: @ %bb.2:
|
|
; V7A-NEXT: lsl r2, r0, #30
|
|
; V7A-NEXT: mov r3, #33024
|
|
; V7A-NEXT: and r2, r3, r2, asr #31
|
|
; V7A-NEXT: lsr r0, r0, #2
|
|
; V7A-NEXT: bfi r2, r0, #7, #1
|
|
; V7A-NEXT: bfi r2, r0, #14, #1
|
|
; V7A-NEXT: b .LBB0_5
|
|
; V7A-NEXT: .LBB0_3: @ %if.else
|
|
; V7A-NEXT: tst r0, #14
|
|
; V7A-NEXT: bxeq lr
|
|
; V7A-NEXT: .LBB0_4:
|
|
; V7A-NEXT: lsl r2, r0, #30
|
|
; V7A-NEXT: mov r3, #8256
|
|
; V7A-NEXT: and r2, r3, r2, asr #31
|
|
; V7A-NEXT: lsr r0, r0, #2
|
|
; V7A-NEXT: bfi r2, r0, #5, #1
|
|
; V7A-NEXT: bfi r2, r0, #12, #1
|
|
; V7A-NEXT: .LBB0_5: @ %if.end
|
|
; V7A-NEXT: str r2, [r1]
|
|
; V7A-NEXT: .LBB0_6: @ %exit
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: f:
|
|
; V7A-T: @ %bb.0: @ %entry
|
|
; V7A-T-NEXT: movs r2, #0
|
|
; V7A-T-NEXT: str r2, [r1]
|
|
; V7A-T-NEXT: lsls r2, r0, #31
|
|
; V7A-T-NEXT: bne .LBB0_3
|
|
; V7A-T-NEXT: @ %bb.1: @ %if.then
|
|
; V7A-T-NEXT: tst.w r0, #14
|
|
; V7A-T-NEXT: beq .LBB0_6
|
|
; V7A-T-NEXT: @ %bb.2:
|
|
; V7A-T-NEXT: lsls r2, r0, #30
|
|
; V7A-T-NEXT: mov.w r3, #33024
|
|
; V7A-T-NEXT: and.w r2, r3, r2, asr #31
|
|
; V7A-T-NEXT: lsrs r0, r0, #2
|
|
; V7A-T-NEXT: bfi r2, r0, #7, #1
|
|
; V7A-T-NEXT: bfi r2, r0, #14, #1
|
|
; V7A-T-NEXT: b .LBB0_5
|
|
; V7A-T-NEXT: .LBB0_3: @ %if.else
|
|
; V7A-T-NEXT: tst.w r0, #14
|
|
; V7A-T-NEXT: it eq
|
|
; V7A-T-NEXT: bxeq lr
|
|
; V7A-T-NEXT: .LBB0_4:
|
|
; V7A-T-NEXT: lsls r2, r0, #30
|
|
; V7A-T-NEXT: mov.w r3, #8256
|
|
; V7A-T-NEXT: and.w r2, r3, r2, asr #31
|
|
; V7A-T-NEXT: lsrs r0, r0, #2
|
|
; V7A-T-NEXT: bfi r2, r0, #5, #1
|
|
; V7A-T-NEXT: bfi r2, r0, #12, #1
|
|
; V7A-T-NEXT: .LBB0_5: @ %if.end
|
|
; V7A-T-NEXT: str r2, [r1]
|
|
; V7A-T-NEXT: .LBB0_6: @ %exit
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: f:
|
|
; V6M: @ %bb.0: @ %entry
|
|
; V6M-NEXT: .save {r4, lr}
|
|
; V6M-NEXT: push {r4, lr}
|
|
; V6M-NEXT: movs r2, #0
|
|
; V6M-NEXT: str r2, [r1]
|
|
; V6M-NEXT: movs r3, #14
|
|
; V6M-NEXT: ands r3, r0
|
|
; V6M-NEXT: movs r4, #4
|
|
; V6M-NEXT: ands r4, r0
|
|
; V6M-NEXT: movs r2, #2
|
|
; V6M-NEXT: ands r2, r0
|
|
; V6M-NEXT: lsls r0, r0, #31
|
|
; V6M-NEXT: bne .LBB0_5
|
|
; V6M-NEXT: @ %bb.1: @ %if.then
|
|
; V6M-NEXT: movs r0, #129
|
|
; V6M-NEXT: cmp r2, #0
|
|
; V6M-NEXT: beq .LBB0_3
|
|
; V6M-NEXT: @ %bb.2:
|
|
; V6M-NEXT: lsls r2, r0, #8
|
|
; V6M-NEXT: .LBB0_3: @ %if.then
|
|
; V6M-NEXT: cmp r4, #0
|
|
; V6M-NEXT: beq .LBB0_10
|
|
; V6M-NEXT: @ %bb.4: @ %if.then
|
|
; V6M-NEXT: lsls r0, r0, #7
|
|
; V6M-NEXT: b .LBB0_9
|
|
; V6M-NEXT: .LBB0_5: @ %if.else
|
|
; V6M-NEXT: movs r0, #129
|
|
; V6M-NEXT: cmp r2, #0
|
|
; V6M-NEXT: beq .LBB0_7
|
|
; V6M-NEXT: @ %bb.6:
|
|
; V6M-NEXT: lsls r2, r0, #6
|
|
; V6M-NEXT: .LBB0_7: @ %if.else
|
|
; V6M-NEXT: cmp r4, #0
|
|
; V6M-NEXT: beq .LBB0_10
|
|
; V6M-NEXT: @ %bb.8: @ %if.else
|
|
; V6M-NEXT: lsls r0, r0, #5
|
|
; V6M-NEXT: .LBB0_9: @ %if.else
|
|
; V6M-NEXT: adds r2, r2, r0
|
|
; V6M-NEXT: .LBB0_10: @ %if.else
|
|
; V6M-NEXT: cmp r3, #0
|
|
; V6M-NEXT: beq .LBB0_12
|
|
; V6M-NEXT: @ %bb.11: @ %if.end
|
|
; V6M-NEXT: str r2, [r1]
|
|
; V6M-NEXT: .LBB0_12: @ %exit
|
|
; V6M-NEXT: pop {r4, pc}
|
|
entry:
|
|
store i32 0, ptr %outp, align 4
|
|
%and = and i32 %v, 1
|
|
%cmp = icmp eq i32 %and, 0
|
|
%and1 = and i32 %v, 2
|
|
%tobool.not = icmp eq i32 %and1, 0
|
|
%and2 = and i32 %v, 4
|
|
%tobool1.not = icmp eq i32 %and2, 0
|
|
%and3 = and i32 %v, 14
|
|
%tobool2.not = icmp eq i32 %and3, 0
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
|
|
if.then:
|
|
%select = select i1 %tobool.not, i32 0, i32 33024
|
|
%or = or i32 %select, 16512
|
|
%spec.select = select i1 %tobool1.not, i32 %select, i32 %or
|
|
br i1 %tobool2.not, label %exit, label %if.end
|
|
|
|
if.else:
|
|
%select1 = select i1 %tobool.not, i32 0, i32 8256
|
|
%or1 = or i32 %select1, 4128
|
|
%spec.select1 = select i1 %tobool1.not, i32 %select1, i32 %or1
|
|
br i1 %tobool2.not, label %exit, label %if.end
|
|
|
|
if.end:
|
|
%spec.select.sink = phi i32 [ %spec.select, %if.then ], [ %spec.select1, %if.else ]
|
|
store i32 %spec.select.sink, ptr %outp, align 4
|
|
br label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Test with a mask that can be encoded with T32 instruction set, but not with A32.
|
|
define i32 @f0(i1 %c0, i32 %v) {
|
|
; V7M-LABEL: f0:
|
|
; V7M: @ %bb.0: @ %E
|
|
; V7M-NEXT: lsls r0, r0, #31
|
|
; V7M-NEXT: beq .LBB1_2
|
|
; V7M-NEXT: @ %bb.1: @ %A
|
|
; V7M-NEXT: tst.w r1, #16843009
|
|
; V7M-NEXT: itt eq
|
|
; V7M-NEXT: moveq r0, #0
|
|
; V7M-NEXT: bxeq lr
|
|
; V7M-NEXT: b .LBB1_3
|
|
; V7M-NEXT: .LBB1_2: @ %B
|
|
; V7M-NEXT: tst.w r1, #16843009
|
|
; V7M-NEXT: itt ne
|
|
; V7M-NEXT: movne r0, #0
|
|
; V7M-NEXT: bxne lr
|
|
; V7M-NEXT: .LBB1_3: @ %D
|
|
; V7M-NEXT: movs r0, #1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: f0:
|
|
; V7A: @ %bb.0: @ %E
|
|
; V7A-NEXT: movw r2, #257
|
|
; V7A-NEXT: tst r0, #1
|
|
; V7A-NEXT: movt r2, #257
|
|
; V7A-NEXT: and r1, r1, r2
|
|
; V7A-NEXT: beq .LBB1_3
|
|
; V7A-NEXT: @ %bb.1: @ %A
|
|
; V7A-NEXT: cmp r1, #0
|
|
; V7A-NEXT: moveq r0, #0
|
|
; V7A-NEXT: bxeq lr
|
|
; V7A-NEXT: .LBB1_2: @ %D
|
|
; V7A-NEXT: mov r0, #1
|
|
; V7A-NEXT: bx lr
|
|
; V7A-NEXT: .LBB1_3: @ %B
|
|
; V7A-NEXT: mov r0, #0
|
|
; V7A-NEXT: cmp r1, #0
|
|
; V7A-NEXT: moveq r0, #1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: f0:
|
|
; V7A-T: @ %bb.0: @ %E
|
|
; V7A-T-NEXT: lsls r0, r0, #31
|
|
; V7A-T-NEXT: beq .LBB1_2
|
|
; V7A-T-NEXT: @ %bb.1: @ %A
|
|
; V7A-T-NEXT: tst.w r1, #16843009
|
|
; V7A-T-NEXT: itt eq
|
|
; V7A-T-NEXT: moveq r0, #0
|
|
; V7A-T-NEXT: bxeq lr
|
|
; V7A-T-NEXT: b .LBB1_3
|
|
; V7A-T-NEXT: .LBB1_2: @ %B
|
|
; V7A-T-NEXT: tst.w r1, #16843009
|
|
; V7A-T-NEXT: itt ne
|
|
; V7A-T-NEXT: movne r0, #0
|
|
; V7A-T-NEXT: bxne lr
|
|
; V7A-T-NEXT: .LBB1_3: @ %D
|
|
; V7A-T-NEXT: movs r0, #1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: f0:
|
|
; V6M: @ %bb.0: @ %E
|
|
; V6M-NEXT: ldr r2, .LCPI1_0
|
|
; V6M-NEXT: ands r2, r1
|
|
; V6M-NEXT: lsls r0, r0, #31
|
|
; V6M-NEXT: beq .LBB1_3
|
|
; V6M-NEXT: @ %bb.1: @ %A
|
|
; V6M-NEXT: cmp r2, #0
|
|
; V6M-NEXT: bne .LBB1_5
|
|
; V6M-NEXT: @ %bb.2:
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .LBB1_3: @ %B
|
|
; V6M-NEXT: cmp r2, #0
|
|
; V6M-NEXT: beq .LBB1_5
|
|
; V6M-NEXT: @ %bb.4:
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .LBB1_5: @ %D
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .p2align 2
|
|
; V6M-NEXT: @ %bb.6:
|
|
; V6M-NEXT: .LCPI1_0:
|
|
; V6M-NEXT: .long 16843009 @ 0x1010101
|
|
E:
|
|
%a = and i32 %v, 16843009
|
|
br i1 %c0, label %A, label %B
|
|
|
|
A:
|
|
%c1 = icmp eq i32 %a, 0
|
|
br i1 %c1, label %C, label %D
|
|
|
|
B:
|
|
%c2 = icmp eq i32 %a, 0
|
|
br i1 %c2, label %D, label %C
|
|
|
|
C:
|
|
br label %X
|
|
|
|
D:
|
|
br label %X
|
|
|
|
X:
|
|
%x = phi i32 [0, %C], [1, %D]
|
|
ret i32 %x
|
|
}
|
|
|
|
; Test with a mask that can be encoded both with T32 and A32 instruction sets.
|
|
define i32 @f1(i1 %c0, i32 %v) {
|
|
; V7M-LABEL: f1:
|
|
; V7M: @ %bb.0: @ %E
|
|
; V7M-NEXT: lsls r0, r0, #31
|
|
; V7M-NEXT: beq .LBB2_2
|
|
; V7M-NEXT: @ %bb.1: @ %A
|
|
; V7M-NEXT: tst.w r1, #100663296
|
|
; V7M-NEXT: itt eq
|
|
; V7M-NEXT: moveq r0, #0
|
|
; V7M-NEXT: bxeq lr
|
|
; V7M-NEXT: b .LBB2_3
|
|
; V7M-NEXT: .LBB2_2: @ %B
|
|
; V7M-NEXT: tst.w r1, #100663296
|
|
; V7M-NEXT: itt ne
|
|
; V7M-NEXT: movne r0, #0
|
|
; V7M-NEXT: bxne lr
|
|
; V7M-NEXT: .LBB2_3: @ %D
|
|
; V7M-NEXT: movs r0, #1
|
|
; V7M-NEXT: bx lr
|
|
;
|
|
; V7A-LABEL: f1:
|
|
; V7A: @ %bb.0: @ %E
|
|
; V7A-NEXT: tst r0, #1
|
|
; V7A-NEXT: beq .LBB2_3
|
|
; V7A-NEXT: @ %bb.1: @ %A
|
|
; V7A-NEXT: tst r1, #100663296
|
|
; V7A-NEXT: moveq r0, #0
|
|
; V7A-NEXT: bxeq lr
|
|
; V7A-NEXT: .LBB2_2: @ %D
|
|
; V7A-NEXT: mov r0, #1
|
|
; V7A-NEXT: bx lr
|
|
; V7A-NEXT: .LBB2_3: @ %B
|
|
; V7A-NEXT: mov r0, #0
|
|
; V7A-NEXT: tst r1, #100663296
|
|
; V7A-NEXT: moveq r0, #1
|
|
; V7A-NEXT: bx lr
|
|
;
|
|
; V7A-T-LABEL: f1:
|
|
; V7A-T: @ %bb.0: @ %E
|
|
; V7A-T-NEXT: lsls r0, r0, #31
|
|
; V7A-T-NEXT: beq .LBB2_2
|
|
; V7A-T-NEXT: @ %bb.1: @ %A
|
|
; V7A-T-NEXT: tst.w r1, #100663296
|
|
; V7A-T-NEXT: itt eq
|
|
; V7A-T-NEXT: moveq r0, #0
|
|
; V7A-T-NEXT: bxeq lr
|
|
; V7A-T-NEXT: b .LBB2_3
|
|
; V7A-T-NEXT: .LBB2_2: @ %B
|
|
; V7A-T-NEXT: tst.w r1, #100663296
|
|
; V7A-T-NEXT: itt ne
|
|
; V7A-T-NEXT: movne r0, #0
|
|
; V7A-T-NEXT: bxne lr
|
|
; V7A-T-NEXT: .LBB2_3: @ %D
|
|
; V7A-T-NEXT: movs r0, #1
|
|
; V7A-T-NEXT: bx lr
|
|
;
|
|
; V6M-LABEL: f1:
|
|
; V6M: @ %bb.0: @ %E
|
|
; V6M-NEXT: movs r2, #3
|
|
; V6M-NEXT: lsls r2, r2, #25
|
|
; V6M-NEXT: ands r2, r1
|
|
; V6M-NEXT: lsls r0, r0, #31
|
|
; V6M-NEXT: beq .LBB2_3
|
|
; V6M-NEXT: @ %bb.1: @ %A
|
|
; V6M-NEXT: cmp r2, #0
|
|
; V6M-NEXT: bne .LBB2_5
|
|
; V6M-NEXT: @ %bb.2:
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .LBB2_3: @ %B
|
|
; V6M-NEXT: cmp r2, #0
|
|
; V6M-NEXT: beq .LBB2_5
|
|
; V6M-NEXT: @ %bb.4:
|
|
; V6M-NEXT: movs r0, #0
|
|
; V6M-NEXT: bx lr
|
|
; V6M-NEXT: .LBB2_5: @ %D
|
|
; V6M-NEXT: movs r0, #1
|
|
; V6M-NEXT: bx lr
|
|
E:
|
|
%a = and i32 %v, 100663296
|
|
br i1 %c0, label %A, label %B
|
|
|
|
A:
|
|
%c1 = icmp eq i32 %a, 0
|
|
br i1 %c1, label %C, label %D
|
|
|
|
B:
|
|
%c2 = icmp eq i32 %a, 0
|
|
br i1 %c2, label %D, label %C
|
|
|
|
C:
|
|
br label %X
|
|
|
|
D:
|
|
br label %X
|
|
|
|
X:
|
|
%x = phi i32 [0, %C], [1, %D]
|
|
ret i32 %x
|
|
}
|