The comments and the checks in
test/Transforms/SLPVectorizer/X86/schedule_budget.ll
did not match. After commit 352c46e707 the vectorization
has happened also with the reduced budget.
This patch is supposed to restore the original intention with the
test case (the one described in the comments). We want to see
that a restricted budget may reduce the amount of vectorization (i.e.
verifying that the -slp-schedule-budget option makes a difference),
while a higher budget still result in vectorization.
Differential Revision: https://reviews.llvm.org/D152530
178 lines
7.0 KiB
LLVM
178 lines
7.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix LOBUDGET %s
|
|
; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=32 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix HIBUDGET %s
|
|
|
|
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-apple-macosx10.9.0"
|
|
|
|
; Test if the budget for the scheduling region size works.
|
|
; We test with a reduced budget of 16 which should prevent vectorizing the loads.
|
|
|
|
declare void @unknown()
|
|
|
|
define void @test(ptr %a, ptr %b, ptr %c, ptr %d) {
|
|
; LOBUDGET-LABEL: @test(
|
|
; LOBUDGET-NEXT: entry:
|
|
; LOBUDGET-NEXT: [[L0:%.*]] = load float, ptr [[A:%.*]], align 4
|
|
; LOBUDGET-NEXT: [[A1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
|
|
; LOBUDGET-NEXT: [[L1:%.*]] = load float, ptr [[A1]], align 4
|
|
; LOBUDGET-NEXT: [[A2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 2
|
|
; LOBUDGET-NEXT: [[L2:%.*]] = load float, ptr [[A2]], align 4
|
|
; LOBUDGET-NEXT: [[A3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 3
|
|
; LOBUDGET-NEXT: [[L3:%.*]] = load float, ptr [[A3]], align 4
|
|
; LOBUDGET-NEXT: [[L00:%.*]] = fadd float [[L0]], [[L0]]
|
|
; LOBUDGET-NEXT: [[L10:%.*]] = fadd float [[L1]], [[L1]]
|
|
; LOBUDGET-NEXT: [[L20:%.*]] = fadd float [[L2]], [[L2]]
|
|
; LOBUDGET-NEXT: [[L30:%.*]] = fadd float [[L3]], [[L3]]
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: call void @unknown()
|
|
; LOBUDGET-NEXT: store float [[L00]], ptr [[B:%.*]], align 4
|
|
; LOBUDGET-NEXT: [[B1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 1
|
|
; LOBUDGET-NEXT: store float [[L10]], ptr [[B1]], align 4
|
|
; LOBUDGET-NEXT: [[B2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
|
|
; LOBUDGET-NEXT: store float [[L20]], ptr [[B2]], align 4
|
|
; LOBUDGET-NEXT: [[B3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 3
|
|
; LOBUDGET-NEXT: store float [[L30]], ptr [[B3]], align 4
|
|
; LOBUDGET-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
|
|
; LOBUDGET-NEXT: store <4 x float> [[TMP0]], ptr [[D:%.*]], align 4
|
|
; LOBUDGET-NEXT: ret void
|
|
;
|
|
; HIBUDGET-LABEL: @test(
|
|
; HIBUDGET-NEXT: entry:
|
|
; HIBUDGET-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
|
|
; HIBUDGET-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], [[TMP0]]
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: call void @unknown()
|
|
; HIBUDGET-NEXT: store <4 x float> [[TMP1]], ptr [[B:%.*]], align 4
|
|
; HIBUDGET-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
|
|
; HIBUDGET-NEXT: store <4 x float> [[TMP2]], ptr [[D:%.*]], align 4
|
|
; HIBUDGET-NEXT: ret void
|
|
;
|
|
entry:
|
|
; Don't vectorize these loads (with the reduced budget).
|
|
%l0 = load float, ptr %a
|
|
%a1 = getelementptr inbounds float, ptr %a, i64 1
|
|
%l1 = load float, ptr %a1
|
|
%a2 = getelementptr inbounds float, ptr %a, i64 2
|
|
%l2 = load float, ptr %a2
|
|
%a3 = getelementptr inbounds float, ptr %a, i64 3
|
|
%l3 = load float, ptr %a3
|
|
%l00 = fadd float %l0, %l0
|
|
%l10 = fadd float %l1, %l1
|
|
%l20 = fadd float %l2, %l2
|
|
%l30 = fadd float %l3, %l3
|
|
|
|
; some unrelated instructions inbetween to enlarge the scheduling region
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
call void @unknown()
|
|
|
|
; Don't vectorize these stores because their operands are too far away (with
|
|
; the reduced budget).
|
|
store float %l00, ptr %b
|
|
%b1 = getelementptr inbounds float, ptr %b, i64 1
|
|
store float %l10, ptr %b1
|
|
%b2 = getelementptr inbounds float, ptr %b, i64 2
|
|
store float %l20, ptr %b2
|
|
%b3 = getelementptr inbounds float, ptr %b, i64 3
|
|
store float %l30, ptr %b3
|
|
|
|
; But still vectorize the following instructions, because even if the budget
|
|
; is exceeded there is a minimum region size.
|
|
%l4 = load float, ptr %c
|
|
%c1 = getelementptr inbounds float, ptr %c, i64 1
|
|
%l5 = load float, ptr %c1
|
|
%c2 = getelementptr inbounds float, ptr %c, i64 2
|
|
%l6 = load float, ptr %c2
|
|
%c3 = getelementptr inbounds float, ptr %c, i64 3
|
|
%l7 = load float, ptr %c3
|
|
|
|
store float %l4, ptr %d
|
|
%d1 = getelementptr inbounds float, ptr %d, i64 1
|
|
store float %l5, ptr %d1
|
|
%d2 = getelementptr inbounds float, ptr %d, i64 2
|
|
store float %l6, ptr %d2
|
|
%d3 = getelementptr inbounds float, ptr %d, i64 3
|
|
store float %l7, ptr %d3
|
|
|
|
ret void
|
|
}
|
|
|