This matches the behavior from a number of other targets, including e.g. X86. This does have the effect of increasing register pressure slightly, but we have a relative abundance of registers in the ISA compared to other targets which use the same heuristic. The motivation here is that our current cost heuristic treats number of registers as the dominant cost. As a result, an extra use outside of a loop can radically change the LSR result. As an example consider test4 from the recently added test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll. Without a use outside the loop (see test3), we convert the IV into a pointer increment. With one, we leave the gep in place. The pointer increment version both decreases number of instructions in some loops, and creates parallel chains of computation (i.e. decreases critical path depth). Both are generally profitable. Arguably, we should really be using a more sophisticated model here - such as e.g. using profile information or explicitly modeling parallelism gains. However, as a practical matter starting with the same mild hack that other targets have used seems reasonable. Differential Revision: https://reviews.llvm.org/D142227
98 lines
2.9 KiB
LLVM
98 lines
2.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32
|
|
; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64
|
|
|
|
; Test case:
|
|
; - `A[row]` is loop invariant and should be hoisted up to preheader
|
|
; FIXME: RV32 is working as expected, but RV64 doesn't
|
|
|
|
; The following LLVM IR simulates:
|
|
; int A[16][16];
|
|
; void test(int row, int N) {
|
|
; for (int i=0; i<N; ++I) {
|
|
; A[row][i+1] = 4;
|
|
; A[row][i+2] = 5;
|
|
; }
|
|
; }
|
|
|
|
; After LSR:
|
|
; int A[16][16];
|
|
; void test(int row, int N) {
|
|
; for (int *ptr = A[row][2]; N>0; N--) {
|
|
; *(ptr-1) = 4;
|
|
; *(ptr) = 5;
|
|
; ++ptr;
|
|
; }
|
|
; }
|
|
|
|
@A = internal global [16 x [16 x i32]] zeroinitializer, align 32 ; <ptr> [#uses=2]
|
|
|
|
define void @test(i32 signext %row, i32 signext %N.in) nounwind {
|
|
; RV32-LABEL: test:
|
|
; RV32: # %bb.0: # %entry
|
|
; RV32-NEXT: blez a1, .LBB0_3
|
|
; RV32-NEXT: # %bb.1: # %cond_true.preheader
|
|
; RV32-NEXT: slli a0, a0, 6
|
|
; RV32-NEXT: lui a2, %hi(A)
|
|
; RV32-NEXT: addi a2, a2, %lo(A)
|
|
; RV32-NEXT: add a0, a0, a2
|
|
; RV32-NEXT: addi a0, a0, 8
|
|
; RV32-NEXT: li a2, 4
|
|
; RV32-NEXT: li a3, 5
|
|
; RV32-NEXT: .LBB0_2: # %cond_true
|
|
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV32-NEXT: sw a2, -4(a0)
|
|
; RV32-NEXT: sw a3, 0(a0)
|
|
; RV32-NEXT: addi a1, a1, -1
|
|
; RV32-NEXT: addi a0, a0, 4
|
|
; RV32-NEXT: bnez a1, .LBB0_2
|
|
; RV32-NEXT: .LBB0_3: # %return
|
|
; RV32-NEXT: ret
|
|
;
|
|
; RV64-LABEL: test:
|
|
; RV64: # %bb.0: # %entry
|
|
; RV64-NEXT: blez a1, .LBB0_3
|
|
; RV64-NEXT: # %bb.1: # %cond_true.preheader
|
|
; RV64-NEXT: negw a1, a1
|
|
; RV64-NEXT: slli a0, a0, 6
|
|
; RV64-NEXT: lui a2, %hi(A)
|
|
; RV64-NEXT: addi a2, a2, %lo(A)
|
|
; RV64-NEXT: add a0, a0, a2
|
|
; RV64-NEXT: addi a2, a0, 4
|
|
; RV64-NEXT: li a3, 2
|
|
; RV64-NEXT: li a4, 4
|
|
; RV64-NEXT: li a5, 5
|
|
; RV64-NEXT: li a6, 2
|
|
; RV64-NEXT: .LBB0_2: # %cond_true
|
|
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV64-NEXT: sw a4, 0(a2)
|
|
; RV64-NEXT: slli a7, a6, 2
|
|
; RV64-NEXT: add a7, a0, a7
|
|
; RV64-NEXT: sw a5, 0(a7)
|
|
; RV64-NEXT: addiw a6, a6, 1
|
|
; RV64-NEXT: addw a7, a1, a6
|
|
; RV64-NEXT: addi a2, a2, 4
|
|
; RV64-NEXT: bne a7, a3, .LBB0_2
|
|
; RV64-NEXT: .LBB0_3: # %return
|
|
; RV64-NEXT: ret
|
|
entry:
|
|
%N = bitcast i32 %N.in to i32
|
|
%tmp5 = icmp sgt i32 %N.in, 0
|
|
br i1 %tmp5, label %cond_true, label %return
|
|
|
|
cond_true:
|
|
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]
|
|
%tmp2 = add i32 %indvar, 1
|
|
%tmp = getelementptr [16 x [16 x i32]], ptr @A, i32 0, i32 %row, i32 %tmp2
|
|
store i32 4, ptr %tmp
|
|
%tmp5.upgrd.1 = add i32 %indvar, 2
|
|
%tmp7 = getelementptr [16 x [16 x i32]], ptr @A, i32 0, i32 %row, i32 %tmp5.upgrd.1
|
|
store i32 5, ptr %tmp7
|
|
%indvar.next = add i32 %indvar, 1
|
|
%exitcond = icmp eq i32 %indvar.next, %N
|
|
br i1 %exitcond, label %return, label %cond_true
|
|
|
|
return:
|
|
ret void
|
|
}
|