Files
clang-p2996/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll
Guy David f5c62ee0fa [PHIElimination] Reuse existing COPY in predecessor basic block (#131837)
The insertion point of COPY isn't always optimal and could eventually
lead to a worse block layout, see the regression test in the first
commit.

This change affects many architectures but the amount of total
instructions in the test cases seems too be slightly lower.
2025-06-29 21:28:42 +03:00

73 lines
2.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O3 -mattr=+v -lsr-drop-solution | FileCheck --check-prefix=CHECK %s
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64-unknown-linux-gnu"
define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: vsetvli a4, a2, e8, m8, ta, ma
; CHECK-NEXT: beq a4, a2, .LBB0_4
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: sub a5, a2, a4
; CHECK-NEXT: .LBB0_2: # %do.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: add a3, a3, a4
; CHECK-NEXT: add a1, a1, a4
; CHECK-NEXT: bltu a3, a5, .LBB0_2
; CHECK-NEXT: # %bb.3: # %do.end
; CHECK-NEXT: sub a2, a2, a3
; CHECK-NEXT: vsetvli a2, a2, e8, m8, ta, ma
; CHECK-NEXT: .LBB0_4: # %if.end
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: ret
entry:
%0 = ptrtoint ptr %a0 to i64
%1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)
%cmp.not = icmp eq i64 %1, %a2
br i1 %cmp.not, label %if.end, label %if.then
if.then: ; preds = %entry
%add = add i64 %0, %a2
%sub = sub i64 %add, %1
br label %do.body
do.body: ; preds = %do.body, %if.then
%a3.0 = phi i64 [ %0, %if.then ], [ %add1, %do.body ]
%a1.addr.0 = phi ptr [ %a1, %if.then ], [ %add.ptr, %do.body ]
%2 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.0, i64 %1)
%3 = inttoptr i64 %a3.0 to ptr
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %2, ptr %3, i64 %1)
%add1 = add i64 %a3.0, %1
%add.ptr = getelementptr i8, ptr %a1.addr.0, i64 %1
%cmp2 = icmp ugt i64 %sub, %add1
br i1 %cmp2, label %do.body, label %do.end
do.end: ; preds = %do.body
%sub4 = sub i64 %add, %add1
%4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub4, i64 0, i64 3)
br label %if.end
if.end: ; preds = %do.end, %entry
%a3.1 = phi i64 [ %add1, %do.end ], [ %0, %entry ]
%t0.0 = phi i64 [ %4, %do.end ], [ %a2, %entry ]
%a1.addr.1 = phi ptr [ %add.ptr, %do.end ], [ %a1, %entry ]
%5 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.1, i64 %t0.0)
%6 = inttoptr i64 %a3.1 to ptr
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %5, ptr %6, i64 %t0.0)
ret ptr %a0
}
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
declare void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)