Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
Matthew Simpson df2ab917ad [SLP] Avoid signed integer overflow
The test case included with r279125 exposed an existing signed integer
overflow. Since getTreeCost can return INT_MAX, we can't sum this cost together
with other costs, such as getReductionCost.

This patch removes the possibility of assigning a cost of INT_MAX. Since we
were previously using INT_MAX as an indicator for "should not vectorize", we
now explicitly check this condition with "isTreeTinyAndNotFullyVectorizable"
before computing a cost.

This patch adds a run-line to the test case used for r279125 that ensures we
don't vectorize. Previously, this line would vectorize the test case by chance
due to undefined behavior in the cost calculation.

Differential Revision: https://reviews.llvm.org/D23723

llvm-svn: 279562
2016-08-23 20:48:50 +00:00

92 lines
5.2 KiB
LLVM

; RUN: opt < %s -slp-vectorizer -S | FileCheck %s --check-prefix=DEFAULT
; RUN: opt < %s -slp-schedule-budget=0 -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER
; RUN: opt < %s -slp-schedule-budget=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=MAX-COST
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
@a = common global [80 x i8] zeroinitializer, align 16
; DEFAULT-LABEL: @PR28330(
; DEFAULT: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
; DEFAULT: %[[S0:.+]] = select <8 x i1> %1, <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; DEFAULT: %[[R0:.+]] = shufflevector <8 x i32> %[[S0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT: %[[R1:.+]] = add <8 x i32> %[[S0]], %[[R0]]
; DEFAULT: %[[R2:.+]] = shufflevector <8 x i32> %[[R1]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT: %[[R3:.+]] = add <8 x i32> %[[R1]], %[[R2]]
; DEFAULT: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
; DEFAULT: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
; DEFAULT: %tmp34 = add i32 %[[R6]], %tmp17
;
; GATHER-LABEL: @PR28330(
; GATHER: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
; GATHER: %tmp19 = select i1 %tmp1, i32 -720, i32 -80
; GATHER: %tmp21 = select i1 %tmp3, i32 -720, i32 -80
; GATHER: %tmp23 = select i1 %tmp5, i32 -720, i32 -80
; GATHER: %tmp25 = select i1 %tmp7, i32 -720, i32 -80
; GATHER: %tmp27 = select i1 %tmp9, i32 -720, i32 -80
; GATHER: %tmp29 = select i1 %tmp11, i32 -720, i32 -80
; GATHER: %tmp31 = select i1 %tmp13, i32 -720, i32 -80
; GATHER: %tmp33 = select i1 %tmp15, i32 -720, i32 -80
; GATHER: %[[I0:.+]] = insertelement <8 x i32> undef, i32 %tmp19, i32 0
; GATHER: %[[I1:.+]] = insertelement <8 x i32> %[[I0]], i32 %tmp21, i32 1
; GATHER: %[[I2:.+]] = insertelement <8 x i32> %[[I1]], i32 %tmp23, i32 2
; GATHER: %[[I3:.+]] = insertelement <8 x i32> %[[I2]], i32 %tmp25, i32 3
; GATHER: %[[I4:.+]] = insertelement <8 x i32> %[[I3]], i32 %tmp27, i32 4
; GATHER: %[[I5:.+]] = insertelement <8 x i32> %[[I4]], i32 %tmp29, i32 5
; GATHER: %[[I6:.+]] = insertelement <8 x i32> %[[I5]], i32 %tmp31, i32 6
; GATHER: %[[I7:.+]] = insertelement <8 x i32> %[[I6]], i32 %tmp33, i32 7
; GATHER: %[[R0:.+]] = shufflevector <8 x i32> %[[I7]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; GATHER: %[[R1:.+]] = add <8 x i32> %[[I7]], %[[R0]]
; GATHER: %[[R2:.+]] = shufflevector <8 x i32> %[[R1]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; GATHER: %[[R3:.+]] = add <8 x i32> %[[R1]], %[[R2]]
; GATHER: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; GATHER: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
; GATHER: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
; GATHER: %tmp34 = add i32 %[[R6]], %tmp17
;
; MAX-COST-LABEL: @PR28330(
; MAX-COST-NOT: shufflevector
define void @PR28330(i32 %n) {
entry:
%tmp0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1
%tmp1 = icmp eq i8 %tmp0, 0
%tmp2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2
%tmp3 = icmp eq i8 %tmp2, 0
%tmp4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
%tmp5 = icmp eq i8 %tmp4, 0
%tmp6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
%tmp7 = icmp eq i8 %tmp6, 0
%tmp8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
%tmp9 = icmp eq i8 %tmp8, 0
%tmp10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
%tmp11 = icmp eq i8 %tmp10, 0
%tmp12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
%tmp13 = icmp eq i8 %tmp12, 0
%tmp14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
%tmp15 = icmp eq i8 %tmp14, 0
br label %for.body
for.body:
%tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
%tmp19 = select i1 %tmp1, i32 -720, i32 -80
%tmp20 = add i32 %tmp17, %tmp19
%tmp21 = select i1 %tmp3, i32 -720, i32 -80
%tmp22 = add i32 %tmp20, %tmp21
%tmp23 = select i1 %tmp5, i32 -720, i32 -80
%tmp24 = add i32 %tmp22, %tmp23
%tmp25 = select i1 %tmp7, i32 -720, i32 -80
%tmp26 = add i32 %tmp24, %tmp25
%tmp27 = select i1 %tmp9, i32 -720, i32 -80
%tmp28 = add i32 %tmp26, %tmp27
%tmp29 = select i1 %tmp11, i32 -720, i32 -80
%tmp30 = add i32 %tmp28, %tmp29
%tmp31 = select i1 %tmp13, i32 -720, i32 -80
%tmp32 = add i32 %tmp30, %tmp31
%tmp33 = select i1 %tmp15, i32 -720, i32 -80
%tmp34 = add i32 %tmp32, %tmp33
br label %for.body
}