Add a version of calculateRegisterUsage that works estimates register usage for a VPlan. This mostly just ports the existing code, with some updates to figure out what recipes will generate vectors vs scalars. There are number of changes in the computed register usages, but they should be more accurate w.r.t. to the generated vector code. There are the following changes: * Scalar usage increases in most cases by 1, as we always create a scalar canonical IV, which is alive across the loop and is not considered by the legacy implementation * Output is ordered by insertion, now scalar registers are added first due the canonical IV phi. * Using the VPlan, we now also more precisely know if an induction will be vectorized or scalarized. Depends on https://github.com/llvm/llvm-project/pull/126415 PR: https://github.com/llvm/llvm-project/pull/126437
47 lines
2.3 KiB
LLVM
47 lines
2.3 KiB
LLVM
; REQUIRES: asserts
|
|
; RUN: opt --passes=loop-vectorize --mtriple loongarch64-linux-gnu \
|
|
; RUN: --mattr=+lsx -debug-only=loop-vectorize --force-vector-width=1 \
|
|
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR
|
|
; RUN: opt --passes=loop-vectorize --mtriple loongarch64-linux-gnu \
|
|
; RUN: --mattr=+lsx -debug-only=loop-vectorize --force-vector-width=4 \
|
|
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-VECTOR
|
|
|
|
define void @bar(ptr %A, i32 signext %n) {
|
|
; CHECK-LABEL: bar
|
|
; CHECK-SCALAR: LV(REG): Found max usage: 2 item
|
|
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: LoongArch::GPRRC, 3 registers
|
|
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: LoongArch::FPRRC, 1 registers
|
|
; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
|
|
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: LoongArch::GPRRC, 1 registers
|
|
; CHECK-SCALAR-NEXT: LV: The target has 30 registers of LoongArch::GPRRC register class
|
|
; CHECK-SCALAR-NEXT: LV: The target has 32 registers of LoongArch::FPRRC register class
|
|
; CHECK-VECTOR: LV(REG): Found max usage: 2 item
|
|
; CHECK-VECTOR-NEXT: LV(REG): RegisterClass: LoongArch::GPRRC, 2 registers
|
|
; CHECK-VECTOR-NEXT: LV(REG): RegisterClass: LoongArch::VRRC, 2 registers
|
|
; CHECK-VECTOR-NEXT: LV(REG): Found invariant usage: 1 item
|
|
; CHECK-VECTOR-NEXT: LV(REG): RegisterClass: LoongArch::GPRRC, 1 registers
|
|
; CHECK-VECTOR-NEXT: LV: The target has 30 registers of LoongArch::GPRRC register class
|
|
; CHECK-VECTOR-NEXT: LV: The target has 32 registers of LoongArch::VRRC register class
|
|
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %n, 0
|
|
br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext nneg i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body, %entry
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%0 = trunc i64 %indvars.iv to i32
|
|
%conv = sitofp i32 %0 to float
|
|
%arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
|
|
store float %conv, ptr %arrayidx, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|