The LSR may suggest less profitable transformation to the loop. This patch adds check to prevent LSR from generating worse code than what we already have. Since LSR affects nearly all targets, the patch is guarded by the option 'lsr-drop-solution' and default as disable for now. The next step should be extending an TTI interface to allow target(s) to enable this enhancememnt. Debug log is added to remind user of such choice to skip the LSR solution. Reviewed By: Meinersbur, #loopoptwg Differential Revision: https://reviews.llvm.org/D126043
79 lines
3.1 KiB
LLVM
79 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -O3 -mattr=+v -lsr-drop-solution | FileCheck --check-prefix=CHECK %s
|
|
|
|
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
|
target triple = "riscv64-unknown-linux-gnu"
|
|
|
|
define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli a4, a2, e8, m8, ta, mu
|
|
; CHECK-NEXT: bne a4, a2, .LBB0_2
|
|
; CHECK-NEXT: # %bb.1:
|
|
; CHECK-NEXT: mv a3, a0
|
|
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vse8.v v8, (a3)
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB0_2: # %if.then
|
|
; CHECK-NEXT: add a2, a0, a2
|
|
; CHECK-NEXT: sub a5, a2, a4
|
|
; CHECK-NEXT: mv a3, a0
|
|
; CHECK-NEXT: .LBB0_3: # %do.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vse8.v v8, (a3)
|
|
; CHECK-NEXT: add a3, a3, a4
|
|
; CHECK-NEXT: add a1, a1, a4
|
|
; CHECK-NEXT: bltu a3, a5, .LBB0_3
|
|
; CHECK-NEXT: # %bb.4: # %do.end
|
|
; CHECK-NEXT: sub a2, a2, a3
|
|
; CHECK-NEXT: vsetvli a2, a2, e8, m8, ta, mu
|
|
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vse8.v v8, (a3)
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = ptrtoint ptr %a0 to i64
|
|
%1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)
|
|
%cmp.not = icmp eq i64 %1, %a2
|
|
br i1 %cmp.not, label %if.end, label %if.then
|
|
|
|
if.then: ; preds = %entry
|
|
%add = add i64 %0, %a2
|
|
%sub = sub i64 %add, %1
|
|
br label %do.body
|
|
|
|
do.body: ; preds = %do.body, %if.then
|
|
%a3.0 = phi i64 [ %0, %if.then ], [ %add1, %do.body ]
|
|
%a1.addr.0 = phi ptr [ %a1, %if.then ], [ %add.ptr, %do.body ]
|
|
%2 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.0, i64 %1)
|
|
%3 = inttoptr i64 %a3.0 to ptr
|
|
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %2, ptr %3, i64 %1)
|
|
%add1 = add i64 %a3.0, %1
|
|
%add.ptr = getelementptr i8, ptr %a1.addr.0, i64 %1
|
|
%cmp2 = icmp ugt i64 %sub, %add1
|
|
br i1 %cmp2, label %do.body, label %do.end
|
|
|
|
do.end: ; preds = %do.body
|
|
%sub4 = sub i64 %add, %add1
|
|
%4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub4, i64 0, i64 3)
|
|
br label %if.end
|
|
|
|
if.end: ; preds = %do.end, %entry
|
|
%a3.1 = phi i64 [ %add1, %do.end ], [ %0, %entry ]
|
|
%t0.0 = phi i64 [ %4, %do.end ], [ %a2, %entry ]
|
|
%a1.addr.1 = phi ptr [ %add.ptr, %do.end ], [ %a1, %entry ]
|
|
%5 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.1, i64 %t0.0)
|
|
%6 = inttoptr i64 %a3.1 to ptr
|
|
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %5, ptr %6, i64 %t0.0)
|
|
ret ptr %a0
|
|
}
|
|
|
|
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
|
|
|
|
declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
|
|
|
|
declare void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
|