Before this patch we might have emitted pack instructions in between PHI nodes. This patch fixes it by fixing the insert point of the new packs.
300 lines
12 KiB
LLVM
300 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
|
|
|
|
define void @store_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
store float %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @store_fpext_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_fpext_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[PTRD0:%.*]] = getelementptr double, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VCAST:%.*]] = fpext <2 x float> [[VECL]] to <2 x double>
|
|
; CHECK-NEXT: store <2 x double> [[VCAST]], ptr [[PTRD0]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ptrd0 = getelementptr double, ptr %ptr, i32 0
|
|
%ptrd1 = getelementptr double, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
%fpext0 = fpext float %ld0 to double
|
|
%fpext1 = fpext float %ld1 to double
|
|
store double %fpext0, ptr %ptrd0
|
|
store double %fpext1, ptr %ptrd1
|
|
ret void
|
|
}
|
|
|
|
define void @store_fcmp_zext_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_fcmp_zext_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[PTRB0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VCMP:%.*]] = fcmp ogt <2 x float> [[VECL]], [[VECL1]]
|
|
; CHECK-NEXT: [[VCAST:%.*]] = zext <2 x i1> [[VCMP]] to <2 x i32>
|
|
; CHECK-NEXT: store <2 x i32> [[VCAST]], ptr [[PTRB0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ptrb0 = getelementptr i32, ptr %ptr, i32 0
|
|
%ptrb1 = getelementptr i32, ptr %ptr, i32 1
|
|
%ldB0 = load float, ptr %ptr0
|
|
%ldB1 = load float, ptr %ptr1
|
|
%ldA0 = load float, ptr %ptr0
|
|
%ldA1 = load float, ptr %ptr1
|
|
%fcmp0 = fcmp ogt float %ldA0, %ldB0
|
|
%fcmp1 = fcmp ogt float %ldA1, %ldB1
|
|
%zext0 = zext i1 %fcmp0 to i32
|
|
%zext1 = zext i1 %fcmp1 to i32
|
|
store i32 %zext0, ptr %ptrb0
|
|
store i32 %zext1, ptr %ptrb1
|
|
ret void
|
|
}
|
|
|
|
define void @store_fadd_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_fadd_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VEC:%.*]] = fadd <2 x float> [[VECL]], [[VECL1]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ldA0 = load float, ptr %ptr0
|
|
%ldA1 = load float, ptr %ptr1
|
|
%ldB0 = load float, ptr %ptr0
|
|
%ldB1 = load float, ptr %ptr1
|
|
%fadd0 = fadd float %ldA0, %ldB0
|
|
%fadd1 = fadd float %ldA1, %ldB1
|
|
store float %fadd0, ptr %ptr0
|
|
store float %fadd1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @store_fneg_load(ptr %ptr) {
|
|
; CHECK-LABEL: define void @store_fneg_load(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VEC:%.*]] = fneg <2 x float> [[VECL]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
%fneg0 = fneg float %ld0
|
|
%fneg1 = fneg float %ld1
|
|
store float %fneg0, ptr %ptr0
|
|
store float %fneg1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define float @scalars_with_external_uses_not_dead(ptr %ptr) {
|
|
; CHECK-LABEL: define float @scalars_with_external_uses_not_dead(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
|
|
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[USER:%.*]] = fneg float [[LD1]]
|
|
; CHECK-NEXT: ret float [[LD0]]
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
store float %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
%user = fneg float %ld1
|
|
ret float %ld0
|
|
}
|
|
|
|
define void @pack_scalars(ptr %ptr, ptr %ptr2) {
|
|
; CHECK-LABEL: define void @pack_scalars(
|
|
; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR2]], align 4
|
|
; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x float> poison, float [[LD0]], i32 0
|
|
; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LD1]], i32 1
|
|
; CHECK-NEXT: store <2 x float> [[PACK1]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr2
|
|
store float %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
declare void @foo()
|
|
define void @cant_vectorize_seeds(ptr %ptr) {
|
|
; CHECK-LABEL: define void @cant_vectorize_seeds(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
|
|
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
|
|
; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4
|
|
; CHECK-NEXT: call void @foo()
|
|
; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
store float %ld1, ptr %ptr1
|
|
call void @foo() ; This call blocks scheduling of the store seeds.
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @pack_vectors(ptr %ptr, ptr %ptr2) {
|
|
; CHECK-LABEL: define void @pack_vectors(
|
|
; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTR2:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr <2 x float>, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[LD0:%.*]] = load <2 x float>, ptr [[PTR0]], align 8
|
|
; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR2]], align 4
|
|
; CHECK-NEXT: [[VPACK:%.*]] = extractelement <2 x float> [[LD0]], i32 0
|
|
; CHECK-NEXT: [[VPACK1:%.*]] = insertelement <3 x float> poison, float [[VPACK]], i32 0
|
|
; CHECK-NEXT: [[VPACK2:%.*]] = extractelement <2 x float> [[LD0]], i32 1
|
|
; CHECK-NEXT: [[VPACK3:%.*]] = insertelement <3 x float> [[VPACK1]], float [[VPACK2]], i32 1
|
|
; CHECK-NEXT: [[PACK:%.*]] = insertelement <3 x float> [[VPACK3]], float [[LD1]], i32 2
|
|
; CHECK-NEXT: store <3 x float> [[PACK]], ptr [[PTR0]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr <2 x float>, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 2
|
|
%ld0 = load <2 x float>, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr2
|
|
store <2 x float> %ld0, ptr %ptr0
|
|
store float %ld1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @diamond(ptr %ptr) {
|
|
; CHECK-LABEL: define void @diamond(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VECL]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
%sub0 = fsub float %ld0, %ld0
|
|
%sub1 = fsub float %ld1, %ld1
|
|
store float %sub0, ptr %ptr0
|
|
store float %sub1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @diamondWithShuffle(ptr %ptr) {
|
|
; CHECK-LABEL: define void @diamondWithShuffle(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[VSHUF:%.*]] = shufflevector <2 x float> [[VECL]], <2 x float> [[VECL]], <2 x i32> <i32 1, i32 0>
|
|
; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VSHUF]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
%sub0 = fsub float %ld0, %ld1
|
|
%sub1 = fsub float %ld1, %ld0
|
|
store float %sub0, ptr %ptr0
|
|
store float %sub1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @diamondMultiInput(ptr %ptr, ptr %ptrX) {
|
|
; CHECK-LABEL: define void @diamondMultiInput(
|
|
; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]]) {
|
|
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
|
|
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
|
|
; CHECK-NEXT: [[VINS:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0
|
|
; CHECK-NEXT: [[VEXT:%.*]] = extractelement <2 x float> [[VECL]], i32 0
|
|
; CHECK-NEXT: [[VINS1:%.*]] = insertelement <2 x float> [[VINS]], float [[VEXT]], i32 1
|
|
; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], [[VINS1]]
|
|
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%ptr0 = getelementptr float, ptr %ptr, i32 0
|
|
%ptr1 = getelementptr float, ptr %ptr, i32 1
|
|
%ld0 = load float, ptr %ptr0
|
|
%ld1 = load float, ptr %ptr1
|
|
|
|
%ldX = load float, ptr %ptrX
|
|
|
|
%sub0 = fsub float %ld0, %ldX
|
|
%sub1 = fsub float %ld1, %ld0
|
|
store float %sub0, ptr %ptr0
|
|
store float %sub1, ptr %ptr1
|
|
ret void
|
|
}
|
|
|
|
define void @diamondWithConstantVector(ptr %ptr) {
|
|
; CHECK-LABEL: define void @diamondWithConstantVector(
|
|
; CHECK-SAME: ptr [[PTR:%.*]]) {
|
|
; CHECK-NEXT: [[GEPA0:%.*]] = getelementptr i32, ptr [[PTR]], i64 0
|
|
; CHECK-NEXT: [[GEPB0:%.*]] = getelementptr i32, ptr [[PTR]], i64 10
|
|
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[GEPA0]], align 4
|
|
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[GEPB0]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%gepA0 = getelementptr i32, ptr %ptr, i64 0
|
|
%gepA1 = getelementptr i32, ptr %ptr, i64 1
|
|
|
|
%gepB0 = getelementptr i32, ptr %ptr, i64 10
|
|
%gepB1 = getelementptr i32, ptr %ptr, i64 11
|
|
|
|
%zext0 = zext i16 0 to i32
|
|
%zext1 = zext i16 0 to i32
|
|
|
|
store i32 %zext0, ptr %gepA0
|
|
store i32 %zext1, ptr %gepA1
|
|
|
|
%orB0 = or i32 0, %zext0
|
|
%orB1 = or i32 0, %zext1
|
|
store i32 %orB0, ptr %gepB0
|
|
store i32 %orB1, ptr %gepB1
|
|
ret void
|
|
}
|