Add a version of calculateRegisterUsage that works estimates register usage for a VPlan. This mostly just ports the existing code, with some updates to figure out what recipes will generate vectors vs scalars. There are number of changes in the computed register usages, but they should be more accurate w.r.t. to the generated vector code. There are the following changes: * Scalar usage increases in most cases by 1, as we always create a scalar canonical IV, which is alive across the loop and is not considered by the legacy implementation * Output is ordered by insertion, now scalar registers are added first due the canonical IV phi. * Using the VPlan, we now also more precisely know if an induction will be vectorized or scalarized. Depends on https://github.com/llvm/llvm-project/pull/126415 PR: https://github.com/llvm/llvm-project/pull/126437
113 lines
5.8 KiB
LLVM
113 lines
5.8 KiB
LLVM
; REQUIRES: asserts
|
|
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
|
|
; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
|
|
; RUN: -riscv-v-vector-bits-min=128 -force-vector-width=1 \
|
|
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR
|
|
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
|
|
; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
|
|
; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=1 \
|
|
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1
|
|
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
|
|
; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
|
|
; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 \
|
|
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2
|
|
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
|
|
; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
|
|
; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=4 \
|
|
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4
|
|
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
|
|
; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
|
|
; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=8 \
|
|
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8
|
|
|
|
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
|
|
; CHECK-LABEL: add
|
|
; CHECK-SCALAR: LV(REG): Found max usage: 2 item
|
|
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
|
|
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers
|
|
; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
|
|
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
|
|
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
|
|
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
|
|
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
|
|
; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item
|
|
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
|
|
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
|
|
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
|
|
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
|
|
; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item
|
|
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
|
|
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
|
|
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
|
|
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
|
|
; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item
|
|
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
|
|
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
|
|
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
|
|
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
|
|
; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item
|
|
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
|
|
|
|
entry:
|
|
%conv = zext i32 %size to i64
|
|
%cmp10.not = icmp eq i32 %size, 0
|
|
br i1 %cmp10.not, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
|
|
for.body:
|
|
%i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ]
|
|
%arrayidx = getelementptr inbounds float, ptr %src1, i64 %i.011
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%arrayidx2 = getelementptr inbounds float, ptr %src2, i64 %i.011
|
|
%1 = load float, ptr %arrayidx2, align 4
|
|
%add = fadd float %0, %1
|
|
%arrayidx3 = getelementptr inbounds float, ptr %result, i64 %i.011
|
|
store float %add, ptr %arrayidx3, align 4
|
|
%add4 = add nuw nsw i64 %i.011, 1
|
|
%exitcond.not = icmp eq i64 %add4, %conv
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
|
|
; CHECK-LABEL: goo
|
|
; CHECK-SCALAR: LV(REG): Found max usage: 1 item
|
|
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
|
|
; CHECK-LMUL1: LV(REG): Found max usage: 2 item
|
|
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
|
|
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers
|
|
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
|
|
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
|
|
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
|
|
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
|
|
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
|
|
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
|
|
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
|
|
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
|
|
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
|
|
entry:
|
|
%cmp3 = icmp sgt i32 %n, 0
|
|
br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.cond.cleanup.loopexit: ; preds = %for.body
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body.preheader, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv
|
|
%0 = load ptr, ptr %arrayidx, align 8
|
|
%add.ptr = getelementptr inbounds i32, ptr %0, i64 1
|
|
store ptr %add.ptr, ptr %arrayidx, align 8
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
|
|
}
|