This fixes a crash found when compiling OpenBLAS with -mllvm
-verify-machineinstrs.
When we "forward" the AVL from the output of a vsetvli, we might have to
extend the LiveInterval of the AVL to where insert the new vsetvli.
Most of the time we are able to extend the LiveInterval because there's
only one val num (definition) for the register. But PHI elimination can
assign multiple values to the same register, in which case we end up
clobbering a different val num when extending:
%x = PseudoVSETVLI %avl, ...
%avl = ADDI ...
%v = PseudoVADD ..., avl=%x
; %avl is forwarded to PseudoVADD:
%x = PseudoVSETVLI %avl, ...
%avl = ADDI ...
%v = PseudoVADD ..., avl=%avl
Here there's no way to extend the %avl from the vsetvli since %avl is
redefined, i.e. we have two val nums.
This fixes it by only forwarding it when we have exactly one val num,
where it should be safe to extend it.
78 lines
3.1 KiB
LLVM
78 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -O3 -mattr=+v -lsr-drop-solution | FileCheck --check-prefix=CHECK %s
|
|
|
|
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
|
target triple = "riscv64-unknown-linux-gnu"
|
|
|
|
define ptr @foo(ptr %a0, ptr %a1, i64 %a2) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: vsetvli a4, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: bne a4, a2, .LBB0_2
|
|
; CHECK-NEXT: # %bb.1:
|
|
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vse8.v v8, (a0)
|
|
; CHECK-NEXT: ret
|
|
; CHECK-NEXT: .LBB0_2: # %if.then
|
|
; CHECK-NEXT: add a2, a0, a2
|
|
; CHECK-NEXT: sub a5, a2, a4
|
|
; CHECK-NEXT: mv a3, a0
|
|
; CHECK-NEXT: .LBB0_3: # %do.body
|
|
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vse8.v v8, (a3)
|
|
; CHECK-NEXT: add a3, a3, a4
|
|
; CHECK-NEXT: add a1, a1, a4
|
|
; CHECK-NEXT: bltu a3, a5, .LBB0_3
|
|
; CHECK-NEXT: # %bb.4: # %do.end
|
|
; CHECK-NEXT: sub a2, a2, a3
|
|
; CHECK-NEXT: vsetvli a2, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
|
|
; CHECK-NEXT: vle8.v v8, (a1)
|
|
; CHECK-NEXT: vse8.v v8, (a3)
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = ptrtoint ptr %a0 to i64
|
|
%1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3)
|
|
%cmp.not = icmp eq i64 %1, %a2
|
|
br i1 %cmp.not, label %if.end, label %if.then
|
|
|
|
if.then: ; preds = %entry
|
|
%add = add i64 %0, %a2
|
|
%sub = sub i64 %add, %1
|
|
br label %do.body
|
|
|
|
do.body: ; preds = %do.body, %if.then
|
|
%a3.0 = phi i64 [ %0, %if.then ], [ %add1, %do.body ]
|
|
%a1.addr.0 = phi ptr [ %a1, %if.then ], [ %add.ptr, %do.body ]
|
|
%2 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.0, i64 %1)
|
|
%3 = inttoptr i64 %a3.0 to ptr
|
|
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %2, ptr %3, i64 %1)
|
|
%add1 = add i64 %a3.0, %1
|
|
%add.ptr = getelementptr i8, ptr %a1.addr.0, i64 %1
|
|
%cmp2 = icmp ugt i64 %sub, %add1
|
|
br i1 %cmp2, label %do.body, label %do.end
|
|
|
|
do.end: ; preds = %do.body
|
|
%sub4 = sub i64 %add, %add1
|
|
%4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %sub4, i64 0, i64 3)
|
|
br label %if.end
|
|
|
|
if.end: ; preds = %do.end, %entry
|
|
%a3.1 = phi i64 [ %add1, %do.end ], [ %0, %entry ]
|
|
%t0.0 = phi i64 [ %4, %do.end ], [ %a2, %entry ]
|
|
%a1.addr.1 = phi ptr [ %add.ptr, %do.end ], [ %a1, %entry ]
|
|
%5 = tail call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, ptr %a1.addr.1, i64 %t0.0)
|
|
%6 = inttoptr i64 %a3.1 to ptr
|
|
tail call void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8> %5, ptr %6, i64 %t0.0)
|
|
ret ptr %a0
|
|
}
|
|
|
|
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
|
|
|
|
declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
|
|
|
|
declare void @llvm.riscv.vse.nxv64i8.i64(<vscale x 64 x i8>, ptr nocapture, i64)
|