Files
clang-p2996/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll
Luke Lau db782b44b3 [RISCV] Don't forward AVL in VSETVLIInfo if it would clobber other definitions (#97264)
This fixes a crash found when compiling OpenBLAS with -mllvm
-verify-machineinstrs.

When we "forward" the AVL from the output of a vsetvli, we might have to
extend the LiveInterval of the AVL to where insert the new vsetvli.

Most of the time we are able to extend the LiveInterval because there's
only one val num (definition) for the register. But PHI elimination can
assign multiple values to the same register, in which case we end up
clobbering a different val num when extending:

    %x = PseudoVSETVLI %avl, ...
    %avl = ADDI ...
    %v = PseudoVADD ..., avl=%x
    ; %avl is forwarded to PseudoVADD:
    %x = PseudoVSETVLI %avl, ...
    %avl = ADDI ...
    %v = PseudoVADD ..., avl=%avl

Here there's no way to extend the %avl from the vsetvli since %avl is
redefined, i.e. we have two val nums.

This fixes it by only forwarding it when we have exactly one val num,
where it should be safe to extend it.
2024-07-05 11:44:59 +08:00

78 lines
3.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O3 -mattr=+v -lsr-drop-solution | FileCheck --check-prefix=CHECK %s
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64-unknown-linux-gnu"
define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a4, a2, e8, m8, ta, ma
; CHECK-NEXT: bne a4, a2, .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: # %if.then
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: sub a5, a2, a4
; CHECK-NEXT: mv a3, a0
; CHECK-NEXT: .LBB0_3: # %do.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: add a3, a3, a4
; CHECK-NEXT: add a1, a1, a4
; CHECK-NEXT: bltu a3, a5, .LBB0_3
; CHECK-NEXT: # %bb.4: # %do.end
; CHECK-NEXT: sub a2, a2, a3
; CHECK-NEXT: vsetvli a2, a2, e8, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a3)
; CHECK-NEXT: ret
entry:
%0 = ptrtoint ptr %a0 to i64
%1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)
%cmp.not = icmp eq i64 %1, %a2
br i1 %cmp.not, label %if.end, label %if.then
if.then: ; preds = %entry
%add = add i64 %0, %a2
%sub = sub i64 %add, %1
br label %do.body
do.body: ; preds = %do.body, %if.then
%a3.0 = phi i64 [ %0, %if.then ], [ %add1, %do.body ]
%a1.addr.0 = phi ptr [ %a1, %if.then ], [ %add.ptr, %do.body ]
%2 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.0, i64 %1)
%3 = inttoptr i64 %a3.0 to ptr
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %2, ptr %3, i64 %1)
%add1 = add i64 %a3.0, %1
%add.ptr = getelementptr i8, ptr %a1.addr.0, i64 %1
%cmp2 = icmp ugt i64 %sub, %add1
br i1 %cmp2, label %do.body, label %do.end
do.end: ; preds = %do.body
%sub4 = sub i64 %add, %add1
%4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub4, i64 0, i64 3)
br label %if.end
if.end: ; preds = %do.end, %entry
%a3.1 = phi i64 [ %add1, %do.end ], [ %0, %entry ]
%t0.0 = phi i64 [ %4, %do.end ], [ %a2, %entry ]
%a1.addr.1 = phi ptr [ %add.ptr, %do.end ], [ %a1, %entry ]
%5 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.1, i64 %t0.0)
%6 = inttoptr i64 %a3.1 to ptr
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %5, ptr %6, i64 %t0.0)
ret ptr %a0
}
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
declare void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)