Files
clang-p2996/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
Nikita Popov ddb46abd3c [LSR] Don't consider users of constant outside loop
In CollectLoopInvariantFixupsAndFormulae(), LSR looks at users
outside the loop. E.g. if we have an addrec based on %base, and
%base is also used outside the loop, then we have to keep it in a
register anyway, which may make it more profitable to use
%base + %idx style addressing.

This reasoning doesn't hold up when the base is a constant, because
the constant can be rematerialized. The lsr-memcpy.ll test regressed
when enabling opaque pointers, because inttoptr (i64 6442450944 to ptr)
now also has a use outside the loop (previously it didn't due to a
pointer type difference), and that extra "use" results in worse use
of addressing modes in the loop. However, the use outside the loop
actually gets rematerialized, so the alleged register saving does
not occur.

The same reasoning also applies to other types of constants, such
as global variable references.

Differential Revision: https://reviews.llvm.org/D155073
2023-07-13 12:22:38 +02:00

88 lines
3.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid < %s | FileCheck %s
; rdar://10232252
; Prevent LSR of doing poor choice that cannot be folded in addressing mode
; Remove the -pre-RA-sched=list-hybrid option after fixing:
; <rdar://problem/12702735> [ARM64][coalescer] need better register
; coalescing for simple unit tests.
define i32 @test_inttoptr() nounwind {
; CHECK-LABEL: test_inttoptr:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: mov w8, #1288 // =0x508
; CHECK-NEXT: mov x9, #4294967296 // =0x100000000
; CHECK-NEXT: mov x10, #6442450944 // =0x180000000
; CHECK-NEXT: .LBB0_1: // %while.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr x11, [x9], #8
; CHECK-NEXT: str x11, [x10], #8
; CHECK-NEXT: subs x8, x8, #8
; CHECK-NEXT: b.pl .LBB0_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: mov x8, #6442450944 // =0x180000000
; CHECK-NEXT: blr x8
; CHECK-NEXT: mov w0, #0 // =0x0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
br label %while.body
while.body: ; preds = %while.body, %entry
%len.06 = phi i64 [ 1288, %entry ], [ %sub, %while.body ]
%pDst.05 = phi ptr [ inttoptr (i64 6442450944 to ptr), %entry ], [ %incdec.ptr1, %while.body ]
%pSrc.04 = phi ptr [ inttoptr (i64 4294967296 to ptr), %entry ], [ %incdec.ptr, %while.body ]
%incdec.ptr = getelementptr inbounds i64, ptr %pSrc.04, i64 1
%tmp = load volatile i64, ptr %pSrc.04, align 8
%incdec.ptr1 = getelementptr inbounds i64, ptr %pDst.05, i64 1
store volatile i64 %tmp, ptr %pDst.05, align 8
%sub = add i64 %len.06, -8
%cmp = icmp sgt i64 %sub, -1
br i1 %cmp, label %while.body, label %while.end
while.end: ; preds = %while.body
tail call void inttoptr (i64 6442450944 to ptr)() nounwind
ret i32 0
}
@g1 = external dso_local global i8
@g2 = external dso_local global i8
define ptr @test_globals() nounwind {
; CHECK-LABEL: test_globals:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #1288 // =0x508
; CHECK-NEXT: adrp x9, g2
; CHECK-NEXT: add x9, x9, :lo12:g2
; CHECK-NEXT: adrp x10, g1
; CHECK-NEXT: add x10, x10, :lo12:g1
; CHECK-NEXT: .LBB1_1: // %while.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr x11, [x9], #8
; CHECK-NEXT: str x11, [x10], #8
; CHECK-NEXT: subs x8, x8, #8
; CHECK-NEXT: b.pl .LBB1_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: adrp x0, g1
; CHECK-NEXT: add x0, x0, :lo12:g1
; CHECK-NEXT: ret
entry:
br label %while.body
while.body: ; preds = %while.body, %entry
%len.06 = phi i64 [ 1288, %entry ], [ %sub, %while.body ]
%pDst.05 = phi ptr [ @g1, %entry ], [ %incdec.ptr1, %while.body ]
%pSrc.04 = phi ptr [ @g2, %entry ], [ %incdec.ptr, %while.body ]
%incdec.ptr = getelementptr inbounds i64, ptr %pSrc.04, i64 1
%tmp = load volatile i64, ptr %pSrc.04, align 8
%incdec.ptr1 = getelementptr inbounds i64, ptr %pDst.05, i64 1
store volatile i64 %tmp, ptr %pDst.05, align 8
%sub = add i64 %len.06, -8
%cmp = icmp sgt i64 %sub, -1
br i1 %cmp, label %while.body, label %while.end
while.end: ; preds = %while.body
ret ptr @g1
}