Files
clang-p2996/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
Mingming Liu f62d8a1a50 [AArch64] Compare BFI and ORR with left-shifted operand for OR instruction selection.
Before this patch:
- For `r = or op0, op1`, `tryBitfieldInsertOpFromOr` combines it to BFI when
  1) one of the two operands is bit-field-positioning or bit-field-extraction op; and
  2) bits from the two operands don't overlap

After this patch:
- Right before OR is combined to BFI, evaluates if ORR with left-shifted operand is better.

A motivating example (https://godbolt.org/z/rnMrzs5vn, which is added as a test case in `test_orr_not_bfi` in `CodeGen/AArch64/bitfield-insert.ll`)

For IR:
```
define i64 @test_orr_not_bfxil(i64 %0) {
  %2 = and i64 %0, 127
  %3 = lshr i64 %0, 1
  %4 = and i64 %3, 16256
  %5 = or i64 %4, %2
  ret i64 %5
}
```

Before:
```
   lsr     x8, x0, #1
   and     x8, x8, #0x3f80
   bfxil   x8, x0, #0, #7
```

After:
```
   ubfx x8, x0, #8, #7
   and x9, x0, #0x7f
   orr x0, x9, x8, lsl #7
```

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D135102
2022-11-03 12:32:08 -07:00

150 lines
5.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt -mtriple=aarch64-linux-gnu -type-promotion < %s | llc -mtriple=aarch64-linux-gnu -o - | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
%struct.bar = type { %struct.foo }
%struct.foo = type { %struct.wobble* }
%struct.wobble = type { %struct.zot* }
%struct.zot = type <{ %struct.wobble, %struct.zot*, %struct.wobble*, i8, [7 x i8] }>
@global = external global %struct.bar, align 8
define i64 @bfis_in_loop_zero() {
; CHECK-LABEL: bfis_in_loop_zero:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x9, :got:global
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
; CHECK-NEXT: ldr x9, [x9]
; CHECK-NEXT: .LBB0_1: // %midblock
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh w10, [x9, #72]
; CHECK-NEXT: cmp w10, #0
; CHECK-NEXT: ubfx x11, x10, #8, #24
; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: csel w8, w8, w11, eq
; CHECK-NEXT: ldr x11, [x9, #8]
; CHECK-NEXT: and x9, x10, #0xff
; CHECK-NEXT: and x10, x0, #0xffffffff00000000
; CHECK-NEXT: orr x9, x9, x8, lsl #8
; CHECK-NEXT: orr x10, x10, x12, lsl #16
; CHECK-NEXT: orr x0, x10, x9
; CHECK-NEXT: ldr x9, [x11, #16]
; CHECK-NEXT: cbnz x11, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
entry:
%var = load %struct.wobble*, %struct.wobble** getelementptr inbounds (%struct.bar, %struct.bar* @global, i64 0, i32 0, i32 0), align 8
br label %preheader
preheader:
br label %header
header: ; preds = %bb63, %bb
%var4 = phi i64 [ %var30, %latch ], [ 0, %preheader ]
%var5 = phi %struct.wobble* [ %var38, %latch ], [ %var, %preheader ]
%var6 = phi i8 [ %var21, %latch ], [ 0, %preheader ]
br label %midblock
midblock: ; preds = %bb9
%var15 = getelementptr inbounds %struct.wobble, %struct.wobble* %var5, i64 9
%var16 = bitcast %struct.wobble* %var15 to i16*
%var17 = load i16, i16* %var16, align 8
%var18 = icmp eq i16 %var17, 0
%var19 = lshr i16 %var17, 8
%var20 = trunc i16 %var19 to i8
%var21 = select i1 %var18, i8 %var6, i8 %var20
%var22 = zext i8 %var21 to i16
%var23 = shl nuw i16 %var22, 8
%var24 = and i16 %var17, 255
%var25 = or i16 %var23, %var24
%var26 = select i1 %var18, i64 0, i64 65536
%var27 = zext i16 %var25 to i64
%var28 = and i64 %var4, -4294967296
%var29 = or i64 %var26, %var28
%var30 = or i64 %var29, %var27
br label %latch
latch: ; preds = %bb14, %bb9
%var34 = getelementptr inbounds %struct.wobble, %struct.wobble* %var5, i64 1, i32 0
%var35 = load %struct.zot*, %struct.zot** %var34, align 8
%var36 = icmp eq %struct.zot* %var35, null
%var37 = getelementptr inbounds %struct.zot, %struct.zot* %var35, i64 0, i32 2
%var38 = load %struct.wobble*, %struct.wobble** %var37, align 8
br i1 %var36, label %exit, label %header
exit:
ret i64 %var30
}
define i64 @bfis_in_loop_undef() {
; CHECK-LABEL: bfis_in_loop_undef:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x9, :got:global
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: // implicit-def: $x0
; CHECK-NEXT: ldr x9, [x9, :got_lo12:global]
; CHECK-NEXT: ldr x9, [x9]
; CHECK-NEXT: .LBB1_1: // %midblock
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh w10, [x9, #72]
; CHECK-NEXT: cmp w10, #0
; CHECK-NEXT: ubfx x11, x10, #8, #24
; CHECK-NEXT: cset w12, ne
; CHECK-NEXT: csel w8, w8, w11, eq
; CHECK-NEXT: ldr x11, [x9, #8]
; CHECK-NEXT: and x9, x10, #0xff
; CHECK-NEXT: and x10, x0, #0xffffffff00000000
; CHECK-NEXT: orr x9, x9, x8, lsl #8
; CHECK-NEXT: orr x10, x10, x12, lsl #16
; CHECK-NEXT: orr x0, x10, x9
; CHECK-NEXT: ldr x9, [x11, #16]
; CHECK-NEXT: cbnz x11, .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
entry:
%var = load %struct.wobble*, %struct.wobble** getelementptr inbounds (%struct.bar, %struct.bar* @global, i64 0, i32 0, i32 0), align 8
br label %preheader
preheader:
br label %header
header: ; preds = %bb63, %bb
%var4 = phi i64 [ %var30, %latch ], [ undef, %preheader ]
%var5 = phi %struct.wobble* [ %var38, %latch ], [ %var, %preheader ]
%var6 = phi i8 [ %var21, %latch ], [ undef, %preheader ]
br label %midblock
midblock: ; preds = %bb9
%var15 = getelementptr inbounds %struct.wobble, %struct.wobble* %var5, i64 9
%var16 = bitcast %struct.wobble* %var15 to i16*
%var17 = load i16, i16* %var16, align 8
%var18 = icmp eq i16 %var17, 0
%var19 = lshr i16 %var17, 8
%var20 = trunc i16 %var19 to i8
%var21 = select i1 %var18, i8 %var6, i8 %var20
%var22 = zext i8 %var21 to i16
%var23 = shl nuw i16 %var22, 8
%var24 = and i16 %var17, 255
%var25 = or i16 %var23, %var24
%var26 = select i1 %var18, i64 0, i64 65536
%var27 = zext i16 %var25 to i64
%var28 = and i64 %var4, -4294967296
%var29 = or i64 %var26, %var28
%var30 = or i64 %var29, %var27
br label %latch
latch: ; preds = %bb14, %bb9
%var34 = getelementptr inbounds %struct.wobble, %struct.wobble* %var5, i64 1, i32 0
%var35 = load %struct.zot*, %struct.zot** %var34, align 8
%var36 = icmp eq %struct.zot* %var35, null
%var37 = getelementptr inbounds %struct.zot, %struct.zot* %var35, i64 0, i32 2
%var38 = load %struct.wobble*, %struct.wobble** %var37, align 8
br i1 %var36, label %exit, label %header
exit:
ret i64 %var30
}