The insertion point of COPY isn't always optimal and could eventually lead to a worse block layout, see the regression test in the first commit. This change affects many architectures but the amount of total instructions in the test cases seems too be slightly lower.
73 lines
2.9 KiB
LLVM
73 lines
2.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -O3 -mattr=+v -lsr-drop-solution | FileCheck --check-prefix=CHECK %s
|
|
|
|
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
|
target triple = "riscv64-unknown-linux-gnu"
|
|
|
|
define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: mv a3, a0
|
|
; CHECK-NEXT: vsetvli a4, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: beq a4, a2, .LBB0_4
|
|
; CHECK-NEXT: # %bb.1: # %if.then
|
|
; CHECK-NEXT: add a2, a0, a2
|
|
; CHECK-NEXT: sub a5, a2, a4
|
|
; CHECK-NEXT: .LBB0_2: # %do.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vse8.v v8, (a3)
|
|
; CHECK-NEXT: add a3, a3, a4
|
|
; CHECK-NEXT: add a1, a1, a4
|
|
; CHECK-NEXT: bltu a3, a5, .LBB0_2
|
|
; CHECK-NEXT: # %bb.3: # %do.end
|
|
; CHECK-NEXT: sub a2, a2, a3
|
|
; CHECK-NEXT: vsetvli a2, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: .LBB0_4: # %if.end
|
|
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vse8.v v8, (a3)
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = ptrtoint ptr %a0 to i64
|
|
%1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)
|
|
%cmp.not = icmp eq i64 %1, %a2
|
|
br i1 %cmp.not, label %if.end, label %if.then
|
|
|
|
if.then: ; preds = %entry
|
|
%add = add i64 %0, %a2
|
|
%sub = sub i64 %add, %1
|
|
br label %do.body
|
|
|
|
do.body: ; preds = %do.body, %if.then
|
|
%a3.0 = phi i64 [ %0, %if.then ], [ %add1, %do.body ]
|
|
%a1.addr.0 = phi ptr [ %a1, %if.then ], [ %add.ptr, %do.body ]
|
|
%2 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.0, i64 %1)
|
|
%3 = inttoptr i64 %a3.0 to ptr
|
|
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %2, ptr %3, i64 %1)
|
|
%add1 = add i64 %a3.0, %1
|
|
%add.ptr = getelementptr i8, ptr %a1.addr.0, i64 %1
|
|
%cmp2 = icmp ugt i64 %sub, %add1
|
|
br i1 %cmp2, label %do.body, label %do.end
|
|
|
|
do.end: ; preds = %do.body
|
|
%sub4 = sub i64 %add, %add1
|
|
%4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub4, i64 0, i64 3)
|
|
br label %if.end
|
|
|
|
if.end: ; preds = %do.end, %entry
|
|
%a3.1 = phi i64 [ %add1, %do.end ], [ %0, %entry ]
|
|
%t0.0 = phi i64 [ %4, %do.end ], [ %a2, %entry ]
|
|
%a1.addr.1 = phi ptr [ %add.ptr, %do.end ], [ %a1, %entry ]
|
|
%5 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.1, i64 %t0.0)
|
|
%6 = inttoptr i64 %a3.1 to ptr
|
|
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %5, ptr %6, i64 %t0.0)
|
|
ret ptr %a0
|
|
}
|
|
|
|
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
|
|
|
|
declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
|
|
|
|
declare void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
|