SLP includes analysis for the minimum bitwidth, the actual integer operations can be emitted. It allows to reduce register pressure and improve perf. Currently, it includes only cost model and the next transformation relies on InstructionCombiner. Better to do it directly in SLP, it allows to reduce compile time and fix cost model issues.
78 lines
4.1 KiB
LLVM
78 lines
4.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s | FileCheck %s
|
|
|
|
; Function Attrs: nounwind uwtable
|
|
define void @get_block(i32 %y_pos) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: @get_block(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LAND_LHS_TRUE:%.*]]
|
|
; CHECK: land.lhs.true:
|
|
; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_END:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: if.end:
|
|
; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef
|
|
; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[TMP2]], <i32 0, i32 -1, i32 -5, i32 -9>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 undef, i32 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 3
|
|
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP9]], i32 0
|
|
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP11]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP9]], i32 1
|
|
; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP13]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP9]], i32 2
|
|
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP15]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP9]], i32 3
|
|
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP17]]
|
|
; CHECK-NEXT: unreachable
|
|
;
|
|
entry:
|
|
br label %land.lhs.true
|
|
|
|
land.lhs.true: ; preds = %entry
|
|
br i1 undef, label %if.then, label %if.end
|
|
|
|
if.then: ; preds = %land.lhs.true
|
|
unreachable
|
|
|
|
if.end: ; preds = %land.lhs.true
|
|
%sub14 = sub nsw i32 %y_pos, undef
|
|
%shr15 = ashr i32 %sub14, 2
|
|
%cmp.i.i = icmp sgt i32 %shr15, 0
|
|
%cond.i.i = select i1 %cmp.i.i, i32 %shr15, i32 0
|
|
%cmp.i4.i = icmp slt i32 %cond.i.i, undef
|
|
%cond.i5.i = select i1 %cmp.i4.i, i32 %cond.i.i, i32 undef
|
|
%idxprom30 = sext i32 %cond.i5.i to i64
|
|
%arrayidx31 = getelementptr inbounds ptr, ptr undef, i64 %idxprom30
|
|
%cmp.i.i.1 = icmp sgt i32 %sub14, -1
|
|
%cond.i.i.1 = select i1 %cmp.i.i.1, i32 undef, i32 0
|
|
%cmp.i4.i.1 = icmp slt i32 %cond.i.i.1, undef
|
|
%cond.i5.i.1 = select i1 %cmp.i4.i.1, i32 %cond.i.i.1, i32 undef
|
|
%idxprom30.1 = sext i32 %cond.i5.i.1 to i64
|
|
%arrayidx31.1 = getelementptr inbounds ptr, ptr undef, i64 %idxprom30.1
|
|
%cmp.i.i.2 = icmp sgt i32 %sub14, -5
|
|
%cond.i.i.2 = select i1 %cmp.i.i.2, i32 undef, i32 0
|
|
%cmp.i4.i.2 = icmp slt i32 %cond.i.i.2, undef
|
|
%cond.i5.i.2 = select i1 %cmp.i4.i.2, i32 %cond.i.i.2, i32 undef
|
|
%idxprom30.2 = sext i32 %cond.i5.i.2 to i64
|
|
%arrayidx31.2 = getelementptr inbounds ptr, ptr undef, i64 %idxprom30.2
|
|
%cmp.i.i.3 = icmp sgt i32 %sub14, -9
|
|
%cond.i.i.3 = select i1 %cmp.i.i.3, i32 undef, i32 0
|
|
%cmp.i4.i.3 = icmp slt i32 %cond.i.i.3, undef
|
|
%cond.i5.i.3 = select i1 %cmp.i4.i.3, i32 %cond.i.i.3, i32 undef
|
|
%idxprom30.3 = sext i32 %cond.i5.i.3 to i64
|
|
%arrayidx31.3 = getelementptr inbounds ptr, ptr undef, i64 %idxprom30.3
|
|
unreachable
|
|
}
|