We use two approaches for determining the minimum bitwidth.
* Demanded bits
* Value tracking
If demanded bits doesn't result in a narrower type, we then try value tracking.
We need this if we want to root SLP trees with the indices of getelementptr
instructions since all the bits of the indices are demanded.
But there is a missing piece though. We need to be able to distinguish "demanded
and shrinkable" from "demanded and not shrinkable". For example, the bits of %i
in
%i = sext i32 %e1 to i64
%gep = getelementptr inbounds i64, i64* %p, i64 %i
are demanded, but we can shrink %i's type to i32 because it won't change the
result of the getelementptr. On the other hand, in
%tmp15 = sext i32 %tmp14 to i64
%tmp16 = insertvalue { i64, i64 } undef, i64 %tmp15, 0
it doesn't make sense to shrink %tmp15 and we can skip the value tracking.
Ideas are from Matthew Simpson!
Differential Revision: https://reviews.llvm.org/D44868
llvm-svn: 329035
81 lines
4.3 KiB
LLVM
81 lines
4.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck %s
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define void @get_block(i32 %y_pos) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: @get_block(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LAND_LHS_TRUE:%.*]]
|
|
; CHECK: land.lhs.true:
|
|
; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: if.end:
|
|
; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef
|
|
; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> undef, i32 [[SHR15]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SUB14]], i32 1
|
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], <i32 0, i32 -1, i32 -5, i32 -9>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[SHR15]], i32 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 undef, i32 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 3
|
|
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef
|
|
; CHECK-NEXT: [[TMP10:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64>
|
|
; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP10]] to <4 x i32>
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i32 0
|
|
; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP11]], i32 1
|
|
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP11]], i32 2
|
|
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP17]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP11]], i32 3
|
|
; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP19]]
|
|
; CHECK-NEXT: unreachable
|
|
;
|
|
entry:
|
|
br label %land.lhs.true
|
|
|
|
land.lhs.true: ; preds = %entry
|
|
br i1 undef, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %land.lhs.true
|
|
unreachable
|
|
|
|
if.end: ; preds = %land.lhs.true
|
|
%sub14 = sub nsw i32 %y_pos, undef
|
|
%shr15 = ashr i32 %sub14, 2
|
|
%cmp.i.i = icmp sgt i32 %shr15, 0
|
|
%cond.i.i = select i1 %cmp.i.i, i32 %shr15, i32 0
|
|
%cmp.i4.i = icmp slt i32 %cond.i.i, undef
|
|
%cond.i5.i = select i1 %cmp.i4.i, i32 %cond.i.i, i32 undef
|
|
%idxprom30 = sext i32 %cond.i5.i to i64
|
|
%arrayidx31 = getelementptr inbounds i16*, i16** undef, i64 %idxprom30
|
|
%cmp.i.i.1 = icmp sgt i32 %sub14, -1
|
|
%cond.i.i.1 = select i1 %cmp.i.i.1, i32 undef, i32 0
|
|
%cmp.i4.i.1 = icmp slt i32 %cond.i.i.1, undef
|
|
%cond.i5.i.1 = select i1 %cmp.i4.i.1, i32 %cond.i.i.1, i32 undef
|
|
%idxprom30.1 = sext i32 %cond.i5.i.1 to i64
|
|
%arrayidx31.1 = getelementptr inbounds i16*, i16** undef, i64 %idxprom30.1
|
|
%cmp.i.i.2 = icmp sgt i32 %sub14, -5
|
|
%cond.i.i.2 = select i1 %cmp.i.i.2, i32 undef, i32 0
|
|
%cmp.i4.i.2 = icmp slt i32 %cond.i.i.2, undef
|
|
%cond.i5.i.2 = select i1 %cmp.i4.i.2, i32 %cond.i.i.2, i32 undef
|
|
%idxprom30.2 = sext i32 %cond.i5.i.2 to i64
|
|
%arrayidx31.2 = getelementptr inbounds i16*, i16** undef, i64 %idxprom30.2
|
|
%cmp.i.i.3 = icmp sgt i32 %sub14, -9
|
|
%cond.i.i.3 = select i1 %cmp.i.i.3, i32 undef, i32 0
|
|
%cmp.i4.i.3 = icmp slt i32 %cond.i.i.3, undef
|
|
%cond.i5.i.3 = select i1 %cmp.i4.i.3, i32 %cond.i.i.3, i32 undef
|
|
%idxprom30.3 = sext i32 %cond.i5.i.3 to i64
|
|
%arrayidx31.3 = getelementptr inbounds i16*, i16** undef, i64 %idxprom30.3
|
|
unreachable
|
|
}
|