This patch starts initial modeling of VF * UF in VPlan. Initially, introduce a dedicated VFxUF VPValue, which is then populated during VPlan::prepareToExecute. Initially, the VF * UF applies only to the main vector loop region. Once we extend the scope of VPlan in the future, we may want to associate different VFxUFs with different vector loop regions (e.g. the epilogue vector loop) This allows explicitly parameterizing recipes that rely on the VF * UF, like the canonical induction increment. At the moment, this mainly helps to avoid generating some duplicated calls to vscale with scalable vectors. It should also allow using EVL as induction increments explicitly in D99750. Referring to VF * UF is also needed in other places that we plan to migrate to VPlan, like the minimum trip count check during skeleton creation. The first version creates the value for VF * UF directly in prepareToExecute to limit the scope of the patch. A follow-on patch will model VF * UF computation explicitly in VPlan using recipes. Moved from Phabricator (https://reviews.llvm.org/D157322)
1577 lines
98 KiB
LLVM
1577 lines
98 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -mtriple=aarch64-none-linux-gnu -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -mattr=+sve -scalable-vectorization=on -runtime-memory-check-threshold=24 < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
|
|
|
; Check vectorization on an interleaved load group of factor 2 and an interleaved
|
|
; store group of factor 2.
|
|
|
|
; int AB[1024];
|
|
; int CD[1024];
|
|
; void test_array_load2_store2(int C, int D) {
|
|
; for (int i = 0; i < 1024; i+=2) {
|
|
; int A = AB[i];
|
|
; int B = AB[i+1];
|
|
; CD[i] = A + C;
|
|
; CD[i+1] = B * D;
|
|
; }
|
|
; }
|
|
|
|
|
|
@AB = common global [1024 x i32] zeroinitializer, align 4
|
|
@CD = common global [1024 x i32] zeroinitializer, align 4
|
|
|
|
define void @test_array_load2_store2(i32 %C, i32 %D) #1 {
|
|
; CHECK-LABEL: @test_array_load2_store2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP0]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <vscale x 4 x i32> [[TMP2]], [[BROADCAST_SPLAT2]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP3]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -1
|
|
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]])
|
|
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
|
|
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
|
|
%load1 = load i32, i32* %arrayidx0, align 4
|
|
%or = or disjoint i64 %indvars.iv, 1
|
|
%arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %or
|
|
%load2 = load i32, i32* %arrayidx1, align 4
|
|
%add = add nsw i32 %load1, %C
|
|
%mul = mul nsw i32 %load2, %D
|
|
%arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
|
|
store i32 %add, i32* %arrayidx2, align 4
|
|
%arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %or
|
|
store i32 %mul, i32* %arrayidx3, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
|
%cmp = icmp slt i64 %indvars.iv.next, 1024
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Check vectorization on an interleaved load group of factor 2 with narrower types and an interleaved
|
|
; store group of factor 2.
|
|
|
|
; short AB[1024];
|
|
; int CD[1024];
|
|
; void test_array_load2_store2(int C, int D) {
|
|
; for (int i = 0; i < 1024; i+=2) {
|
|
; short A = AB[i];
|
|
; short B = AB[i+1];
|
|
; CD[i] = A + C;
|
|
; CD[i+1] = B * D;
|
|
; }
|
|
; }
|
|
|
|
|
|
@AB_i16 = common global [1024 x i16] zeroinitializer, align 4
|
|
|
|
define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
|
|
; CHECK-LABEL: @test_array_load2_i16_store2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl <vscale x 4 x i64> [[TMP0]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]]
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP4]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i16], ptr @AB_i16, i64 0, <vscale x 4 x i64> [[TMP5]]
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[BROADCAST_SPLAT]], [[TMP7]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER1]] to <vscale x 4 x i32>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <vscale x 4 x i32> [[BROADCAST_SPLAT3]], [[TMP9]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 4 x i64> [[TMP5]], i64 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP11]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 -1
|
|
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP10]])
|
|
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP13]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i16], [1024 x i16]* @AB_i16, i64 0, i64 %indvars.iv
|
|
%0 = load i16, i16* %arrayidx, align 2
|
|
%1 = or disjoint i64 %indvars.iv, 1
|
|
%arrayidx2 = getelementptr inbounds [1024 x i16], [1024 x i16]* @AB_i16, i64 0, i64 %1
|
|
%2 = load i16, i16* %arrayidx2, align 2
|
|
%conv = sext i16 %0 to i32
|
|
%add3 = add nsw i32 %conv, %C
|
|
%arrayidx5 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
|
|
store i32 %add3, i32* %arrayidx5, align 4
|
|
%conv6 = sext i16 %2 to i32
|
|
%mul = mul nsw i32 %conv6, %D
|
|
%arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %1
|
|
store i32 %mul, i32* %arrayidx9, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
|
%cmp = icmp ult i64 %indvars.iv, 1022
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Check vectorization on an interleaved load group of factor 2 and an interleaved
|
|
; store group of factor 2 with narrower types.
|
|
|
|
; int AB[1024];
|
|
; short CD[1024];
|
|
; void test_array_load2_store2(int C, int D) {
|
|
; for (int i = 0; i < 1024; i+=2) {
|
|
; short A = AB[i];
|
|
; short B = AB[i+1];
|
|
; CD[i] = A + C;
|
|
; CD[i+1] = B * D;
|
|
; }
|
|
; }
|
|
|
|
|
|
@CD_i16 = dso_local local_unnamed_addr global [1024 x i16] zeroinitializer, align 2
|
|
|
|
define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 {
|
|
; CHECK-LABEL: @test_array_load2_store2_i16(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl <vscale x 4 x i64> [[TMP0]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP4]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = or disjoint <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[TMP5]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = trunc <vscale x 4 x i32> [[TMP8]] to <vscale x 4 x i16>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, <vscale x 4 x i64> [[VEC_IND]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP9]], <vscale x 4 x ptr> [[TMP10]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <vscale x 4 x i32> [[TMP6]], [[BROADCAST_SPLAT2]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = trunc <vscale x 4 x i32> [[TMP11]] to <vscale x 4 x i16>
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, <vscale x 4 x i64> [[TMP7]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> [[TMP12]], <vscale x 4 x ptr> [[TMP13]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
|
|
%0 = load i32, i32* %arrayidx, align 4
|
|
%1 = or disjoint i64 %indvars.iv, 1
|
|
%arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %1
|
|
%2 = load i32, i32* %arrayidx2, align 4
|
|
%add3 = add nsw i32 %0, %C
|
|
%conv = trunc i32 %add3 to i16
|
|
%arrayidx5 = getelementptr inbounds [1024 x i16], [1024 x i16]* @CD_i16, i64 0, i64 %indvars.iv
|
|
store i16 %conv, i16* %arrayidx5, align 2
|
|
%mul = mul nsw i32 %2, %D
|
|
%conv6 = trunc i32 %mul to i16
|
|
%arrayidx9 = getelementptr inbounds [1024 x i16], [1024 x i16]* @CD_i16, i64 0, i64 %1
|
|
store i16 %conv6, i16* %arrayidx9, align 2
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
|
%cmp = icmp ult i64 %indvars.iv, 1022
|
|
br i1 %cmp, label %for.body, label %for.end
|
|
|
|
for.end: ; preds = %for.body
|
|
ret void
|
|
}
|
|
|
|
; Check vectorization on an interleaved load group of factor 6.
|
|
; There is no dedicated ldN/stN so use gather instead
|
|
|
|
%struct.ST6 = type { i32, i32, i32, i32, i32, i32 }
|
|
|
|
define i32 @test_struct_load6(%struct.ST6* %S) #1 {
|
|
; CHECK-LABEL: @test_struct_load6(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 2
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP2]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ST6:%.*]], ptr [[S:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP3]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 1
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP4]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 2
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP5]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 3
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 4
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP7]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ST6]], ptr [[S]], <vscale x 4 x i64> [[VEC_IND]], i32 5
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP8]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i32> [[TMP9]], [[WIDE_MASKED_GATHER2]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER1]], [[WIDE_MASKED_GATHER3]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[TMP11]], [[WIDE_MASKED_GATHER4]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[TMP12]], [[WIDE_MASKED_GATHER5]]
|
|
; CHECK-NEXT: [[TMP14]] = sub <vscale x 4 x i32> [[TMP10]], [[TMP13]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]])
|
|
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[SUB14_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i32 [[SUB14_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%r.041 = phi i32 [ 0, %entry ], [ %sub14, %for.body ]
|
|
%x = getelementptr inbounds %struct.ST6, %struct.ST6* %S, i64 %indvars.iv, i32 0
|
|
%0 = load i32, i32* %x, align 4
|
|
%y = getelementptr inbounds %struct.ST6, %struct.ST6* %S, i64 %indvars.iv, i32 1
|
|
%1 = load i32, i32* %y, align 4
|
|
%z = getelementptr inbounds %struct.ST6, %struct.ST6* %S, i64 %indvars.iv, i32 2
|
|
%2 = load i32, i32* %z, align 4
|
|
%w = getelementptr inbounds %struct.ST6, %struct.ST6* %S, i64 %indvars.iv, i32 3
|
|
%3 = load i32, i32* %w, align 4
|
|
%a = getelementptr inbounds %struct.ST6, %struct.ST6* %S, i64 %indvars.iv, i32 4
|
|
%4 = load i32, i32* %a, align 4
|
|
%b = getelementptr inbounds %struct.ST6, %struct.ST6* %S, i64 %indvars.iv, i32 5
|
|
%5 = load i32, i32* %b, align 4
|
|
%.neg36 = add i32 %0, %r.041
|
|
%6 = add i32 %.neg36, %2
|
|
%7 = add i32 %1, %3
|
|
%8 = add i32 %7, %4
|
|
%9 = add i32 %8, %5
|
|
%sub14 = sub i32 %6, %9
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
%sub14.lcssa = phi i32 [ %sub14, %for.body ]
|
|
ret i32 %sub14.lcssa
|
|
}
|
|
|
|
|
|
; Check vectorization on a reverse interleaved load group of factor 2 and
|
|
; a reverse interleaved store group of factor 2.
|
|
|
|
; struct ST2 {
|
|
; int x;
|
|
; int y;
|
|
; };
|
|
;
|
|
; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
|
|
; for (int i = 1023; i >= 0; i--) {
|
|
; int a = A[i].x + i; // interleaved load of index 0
|
|
; int b = A[i].y - i; // interleaved load of index 1
|
|
; B[i].x = a; // interleaved store of index 0
|
|
; B[i].y = b; // interleaved store of index 1
|
|
; }
|
|
; }
|
|
|
|
|
|
%struct.ST2 = type { i32, i32 }
|
|
|
|
define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) #1 {
|
|
; CHECK-LABEL: @test_reversed_load2_store2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 2
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1023, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i32 [[TMP1]], -4
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[DOTNEG]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i32 [[TMP3]], 3
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw i32 2, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP7]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP8]])
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
|
|
; CHECK-NEXT: [[REVERSE1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP9]])
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add nsw <vscale x 4 x i32> [[REVERSE]], [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <vscale x 4 x i32> [[REVERSE1]], [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 1
|
|
; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 3
|
|
; CHECK-NEXT: [[TMP15:%.*]] = sub nsw i32 1, [[TMP14]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
|
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP16]]
|
|
; CHECK-NEXT: [[REVERSE2:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP10]])
|
|
; CHECK-NEXT: [[REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP11]])
|
|
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[REVERSE2]], <vscale x 4 x i32> [[REVERSE3]])
|
|
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP17]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
|
|
%load1 = load i32, i32* %x, align 4
|
|
%trunc = trunc i64 %indvars.iv to i32
|
|
%add = add nsw i32 %load1, %trunc
|
|
%y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
|
|
%load2 = load i32, i32* %y, align 4
|
|
%sub = sub nsw i32 %load2, %trunc
|
|
%x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
|
|
store i32 %add, i32* %x5, align 4
|
|
%y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
|
|
store i32 %sub, i32* %y8, align 4
|
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
|
%cmp = icmp sgt i64 %indvars.iv, 0
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
}
|
|
|
|
; Check vectorization on an interleaved load group of factor 2 with 1 gap
|
|
; (missing the load of odd elements). Because the vectorized loop would
|
|
; speculatively access memory out-of-bounds, we must execute at least one
|
|
; iteration of the scalar loop.
|
|
|
|
; void even_load_static_tc(int *A, int *B) {
|
|
; for (unsigned i = 0; i < 1024; i+=2)
|
|
; B[i/2] = A[i] * 2;
|
|
; }
|
|
|
|
|
|
define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) #1 {
|
|
; CHECK-LABEL: @even_load_static_tc(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 512, [[TMP1]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP2]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nsw <vscale x 4 x i32> [[TMP3]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[INDEX]], 9223372036854775804
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP5]]
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP4]], ptr [[TMP6]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[LOAD]], 1
|
|
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[LSHR]]
|
|
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV]], 1022
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
|
%load = load i32, i32* %arrayidx, align 4
|
|
%mul = shl nsw i32 %load, 1
|
|
%lshr = lshr exact i64 %indvars.iv, 1
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %lshr
|
|
store i32 %mul, i32* %arrayidx2, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
|
%cmp = icmp ult i64 %indvars.iv.next, 1024
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
}
|
|
|
|
; Check vectorization on an interleaved load group of factor 2 with 1 gap
|
|
; (missing the load of odd elements). Because the vectorized loop would
|
|
; speculatively access memory out-of-bounds, we must execute at least one
|
|
; iteration of the scalar loop.
|
|
|
|
; void even_load_dynamic_tc(int *A, int *B, unsigned N) {
|
|
; for (unsigned i = 0; i < N; i+=2)
|
|
; B[i/2] = A[i] * 2;
|
|
; }
|
|
|
|
|
|
define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i64 %N) #1 {
|
|
; CHECK-LABEL: @even_load_dynamic_tc(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 2)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK_NOT_NOT:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_NOT_NOT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nuw i64 [[TMP1]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP6]], -1
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP4]], [[TMP7]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP6]], i64 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP9]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
|
|
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 2
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP10]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = shl nsw <vscale x 4 x i32> [[TMP11]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[INDEX]], 9223372036854775804
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]]
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP14]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[LOAD]], 1
|
|
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[LSHR]]
|
|
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
|
%load = load i32, i32* %arrayidx, align 4
|
|
%mul = shl nsw i32 %load, 1
|
|
%lshr = lshr exact i64 %indvars.iv, 1
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %lshr
|
|
store i32 %mul, i32* %arrayidx2, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
|
%cmp = icmp ult i64 %indvars.iv.next, %N
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
}
|
|
|
|
; Check vectorization on a reverse interleaved load group of factor 2 with 1
|
|
; gap and a reverse interleaved store group of factor 2. The interleaved load
|
|
; group should be removed since it has a gap and is reverse.
|
|
|
|
; struct pair {
|
|
; int x;
|
|
; int y;
|
|
; };
|
|
;
|
|
; void load_gap_reverse(struct pair *P1, struct pair *P2, int X) {
|
|
; for (int i = 1023; i >= 0; i--) {
|
|
; int a = X + i;
|
|
; int b = A[i].y - i;
|
|
; B[i].x = a;
|
|
; B[i].y = b;
|
|
; }
|
|
; }
|
|
|
|
%pair = type { i64, i64 }
|
|
define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noalias nocapture readonly %P2, i64 %X) #1 {
|
|
; CHECK-LABEL: @load_gap_reverse(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = sub <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), [[TMP0]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP1]], -4
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[DOTNEG]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 1
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> [[TMP4]], i32 8, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i64> poison)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <vscale x 4 x i64> [[WIDE_MASKED_GATHER]], [[VEC_IND]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP2]], <vscale x 4 x ptr> [[TMP3]], i32 8, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> [[TMP5]], <vscale x 4 x ptr> [[TMP4]], i32 8, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i = phi i64 [ 1023, %entry ], [ %i.next, %for.body ]
|
|
%0 = add nsw i64 %X, %i
|
|
%1 = getelementptr inbounds %pair, %pair* %P1, i64 %i, i32 0
|
|
%2 = getelementptr inbounds %pair, %pair* %P2, i64 %i, i32 1
|
|
%3 = load i64, i64* %2, align 8
|
|
%4 = sub nsw i64 %3, %i
|
|
store i64 %0, i64* %1, align 8
|
|
store i64 %4, i64* %2, align 8
|
|
%i.next = add nsw i64 %i, -1
|
|
%cond = icmp sgt i64 %i, 0
|
|
br i1 %cond, label %for.body, label %for.exit
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
; Check vectorization on interleaved access groups identified from mixed
|
|
; loads/stores.
|
|
; void mixed_load2_store2(int *A, int *B) {
|
|
; for (unsigned i = 0; i < 1024; i+=2) {
|
|
; B[i] = A[i] * A[i+1];
|
|
; B[i+1] = A[i] + A[i+1];
|
|
; }
|
|
; }
|
|
|
|
|
|
define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) #1 {
|
|
; CHECK-LABEL: @mixed_load2_store2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP0]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <vscale x 4 x i32> [[TMP2]], [[TMP1]]
|
|
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <vscale x 4 x i32> [[TMP6]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -1
|
|
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP7]])
|
|
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
|
|
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP19:![0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
|
%load1 = load i32, i32* %arrayidx, align 4
|
|
%or = or disjoint i64 %indvars.iv, 1
|
|
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %or
|
|
%load2 = load i32, i32* %arrayidx2, align 4
|
|
%mul = mul nsw i32 %load2, %load1
|
|
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
|
|
store i32 %mul, i32* %arrayidx4, align 4
|
|
%load3 = load i32, i32* %arrayidx, align 4
|
|
%load4 = load i32, i32* %arrayidx2, align 4
|
|
%add10 = add nsw i32 %load4, %load3
|
|
%arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %or
|
|
store i32 %add10, i32* %arrayidx13, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
|
%cmp = icmp ult i64 %indvars.iv.next, 1024
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
}
|
|
|
|
|
|
; Check vectorization on interleaved access groups with members having different
|
|
; kinds of type.
|
|
|
|
; struct IntFloat {
|
|
; int a;
|
|
; float b;
|
|
; };
|
|
;
|
|
; int SA;
|
|
; float SB;
|
|
;
|
|
; void int_float_struct(struct IntFloat *A) {
|
|
; int SumA;
|
|
; float SumB;
|
|
; for (unsigned i = 0; i < 1024; i++) {
|
|
; SumA += A[i].a;
|
|
; SumB += A[i].b;
|
|
; }
|
|
; SA = SumA;
|
|
; SB = SumB;
|
|
; }
|
|
|
|
|
|
%struct.IntFloat = type { i32, float }
|
|
|
|
@SA = common global i32 0, align 4
|
|
@SB = common global float 0.000000e+00, align 4
|
|
|
|
define void @int_float_struct(%struct.IntFloat* nocapture readonly %p) #0 {
|
|
; CHECK-LABEL: @int_float_struct(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> zeroinitializer, float undef, i32 0), [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> zeroinitializer, i32 undef, i32 0), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_INTFLOAT:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP0]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <vscale x 4 x i32> [[TMP2]] to <vscale x 4 x float>
|
|
; CHECK-NEXT: [[TMP4]] = add <vscale x 4 x i32> [[TMP1]], [[VEC_PHI1]]
|
|
; CHECK-NEXT: [[TMP5]] = fadd fast <vscale x 4 x float> [[VEC_PHI]], [[TMP3]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP4]])
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP5]])
|
|
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr @SA, align 4
|
|
; CHECK-NEXT: store float [[ADD3_LCSSA]], ptr @SB, align 4
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
store i32 %add, i32* @SA, align 4
|
|
store float %add3, float* @SB, align 4
|
|
ret void
|
|
|
|
for.body: ; preds = %for.body, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
|
|
%SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
|
|
%a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %p, i64 %indvars.iv, i32 0
|
|
%load1 = load i32, i32* %a, align 4
|
|
%add = add nsw i32 %load1, %SumA.013
|
|
%b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %p, i64 %indvars.iv, i32 1
|
|
%load2 = load float, float* %b, align 4
|
|
%add3 = fadd fast float %SumB.014, %load2
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
}
|
|
|
|
; Check vectorization of interleaved access groups in the presence of
|
|
; dependences (PR27626). The following tests check that we don't reorder
|
|
; dependent loads and stores when generating code for interleaved access
|
|
; groups. Stores should be scalarized because the required code motion would
|
|
; break dependences, and the remaining interleaved load groups should have
|
|
; gaps.
|
|
|
|
; PR27626_0: Ensure a strided store is not moved after a dependent (zero
|
|
; distance) strided load.
|
|
|
|
; void PR27626_0(struct pair *p, int z, int n) {
|
|
; for (int i = 0; i < n; i++) {
|
|
; p[i].x = z;
|
|
; p[i].y = p[i].x;
|
|
; }
|
|
; }
|
|
|
|
%pair.i32 = type { i32, i32 }
|
|
define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) #1 {
|
|
; CHECK-LABEL: @PR27626_0(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK_NOT:%.*]] = icmp ugt i64 [[SMAX]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_NOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], -1
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], <vscale x 4 x i64> [[VEC_IND]], i32 1
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x ptr> [[TMP10]], i64 0
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0
|
|
; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
|
|
; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_X]], align 4
|
|
; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_Y]], align 4
|
|
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
|
|
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP23:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
|
|
%p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
|
|
%p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
|
|
store i32 %z, i32* %p_i.x, align 4
|
|
%0 = load i32, i32* %p_i.x, align 4
|
|
store i32 %0, i32 *%p_i.y, align 4
|
|
%i.next = add nuw nsw i64 %i, 1
|
|
%cond = icmp slt i64 %i.next, %n
|
|
br i1 %cond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; PR27626_1: Ensure a strided load is not moved before a dependent (zero
|
|
; distance) strided store.
|
|
|
|
; void PR27626_1(struct pair *p, int n) {
|
|
; int s = 0;
|
|
; for (int i = 0; i < n; i++) {
|
|
; p[i].y = p[i].x;
|
|
; s += p[i].y
|
|
; }
|
|
; }
|
|
|
|
define i32 @PR27626_1(%pair.i32 *%p, i64 %n) #1 {
|
|
; CHECK-LABEL: @PR27626_1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK_NOT:%.*]] = icmp ugt i64 [[SMAX]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_NOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], -1
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], <vscale x 4 x i64> [[VEC_IND]], i32 1
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP10]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 4 x ptr> [[TMP11]], i64 0
|
|
; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <vscale x 8 x i32>, ptr [[TMP13]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC1]])
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0
|
|
; CHECK-NEXT: [[TMP15]] = add <vscale x 4 x i32> [[TMP14]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP15]])
|
|
; CHECK-NEXT: br label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP21:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0
|
|
; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
|
|
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[P_I_X]], align 4
|
|
; CHECK-NEXT: store i32 [[TMP20]], ptr [[P_I_Y]], align 4
|
|
; CHECK-NEXT: [[TMP21]] = add nsw i32 [[TMP20]], [[S]]
|
|
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
|
|
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP25:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret i32 [[TMP21]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
|
|
%s = phi i32 [ %2, %for.body ], [ 0, %entry ]
|
|
%p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
|
|
%p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
|
|
%0 = load i32, i32* %p_i.x, align 4
|
|
store i32 %0, i32* %p_i.y, align 4
|
|
%1 = load i32, i32* %p_i.y, align 4
|
|
%2 = add nsw i32 %1, %s
|
|
%i.next = add nuw nsw i64 %i, 1
|
|
%cond = icmp slt i64 %i.next, %n
|
|
br i1 %cond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
%3 = phi i32 [ %2, %for.body ]
|
|
ret i32 %3
|
|
}
|
|
|
|
; PR27626_2: Ensure a strided store is not moved after a dependent (negative
|
|
; distance) strided load.
|
|
|
|
; void PR27626_2(struct pair *p, int z, int n) {
|
|
; for (int i = 0; i < n; i++) {
|
|
; p[i].x = z;
|
|
; p[i].y = p[i - 1].x;
|
|
; }
|
|
; }
|
|
|
|
define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) #1 {
|
|
; CHECK-LABEL: @PR27626_2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK_NOT:%.*]] = icmp ugt i64 [[SMAX]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_NOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], -1
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]], i32 0
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 -1, i32 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], <vscale x 4 x i64> [[VEC_IND]], i32 1
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP11]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0
|
|
; CHECK-NEXT: [[P_I_MINUS_1_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 -1, i32 0
|
|
; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
|
|
; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_X]], align 4
|
|
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[P_I_MINUS_1_X]], align 4
|
|
; CHECK-NEXT: store i32 [[TMP17]], ptr [[P_I_Y]], align 4
|
|
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
|
|
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
|
|
%i_minus_1 = add nuw nsw i64 %i, -1
|
|
%p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
|
|
%p_i_minus_1.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i_minus_1, i32 0
|
|
%p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
|
|
store i32 %z, i32* %p_i.x, align 4
|
|
%0 = load i32, i32* %p_i_minus_1.x, align 4
|
|
store i32 %0, i32 *%p_i.y, align 4
|
|
%i.next = add nuw nsw i64 %i, 1
|
|
%cond = icmp slt i64 %i.next, %n
|
|
br i1 %cond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; PR27626_3: Ensure a strided load is not moved before a dependent (negative
|
|
; distance) strided store.
|
|
|
|
; void PR27626_3(struct pair *p, int z, int n) {
|
|
; for (int i = 0; i < n; i++) {
|
|
; p[i + 1].y = p[i].x;
|
|
; s += p[i].y;
|
|
; }
|
|
; }
|
|
|
|
define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) #1 {
|
|
; CHECK-LABEL: @PR27626_3(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK_NOT:%.*]] = icmp ugt i64 [[SMAX]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_NOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], -1
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
|
|
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[TMP17]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], <vscale x 4 x i64> [[TMP10]], i32 1
|
|
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP11]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP14]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <vscale x 8 x i32>, ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC1]])
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC2]], 0
|
|
; CHECK-NEXT: [[TMP16]] = add <vscale x 4 x i32> [[TMP15]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP16]])
|
|
; CHECK-NEXT: br label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP23:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[I_PLUS_1:%.*]] = add nuw nsw i64 [[I]], 1
|
|
; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0
|
|
; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
|
|
; CHECK-NEXT: [[P_I_PLUS_1_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I_PLUS_1]], i32 1
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[P_I_X]], align 4
|
|
; CHECK-NEXT: store i32 [[TMP21]], ptr [[P_I_PLUS_1_Y]], align 4
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_I_Y]], align 4
|
|
; CHECK-NEXT: [[TMP23]] = add nsw i32 [[TMP22]], [[S]]
|
|
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
|
|
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret i32 [[TMP23]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
|
|
%s = phi i32 [ %2, %for.body ], [ 0, %entry ]
|
|
%i_plus_1 = add nuw nsw i64 %i, 1
|
|
%p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
|
|
%p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
|
|
%p_i_plus_1.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i_plus_1, i32 1
|
|
%0 = load i32, i32* %p_i.x, align 4
|
|
store i32 %0, i32* %p_i_plus_1.y, align 4
|
|
%1 = load i32, i32* %p_i.y, align 4
|
|
%2 = add nsw i32 %1, %s
|
|
%i.next = add nuw nsw i64 %i, 1
|
|
%cond = icmp slt i64 %i.next, %n
|
|
br i1 %cond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
%3 = phi i32 [ %2, %for.body ]
|
|
ret i32 %3
|
|
}
|
|
|
|
; PR27626_4: Ensure we form an interleaved group for strided stores in the
|
|
; presence of a write-after-write dependence. We create a group for
|
|
; (2) and (3) while excluding (1).
|
|
|
|
; void PR27626_4(int *a, int x, int y, int z, int n) {
|
|
; for (int i = 0; i < n; i += 2) {
|
|
; a[i] = x; // (1)
|
|
; a[i] = y; // (2)
|
|
; a[i + 1] = z; // (3)
|
|
; }
|
|
; }
|
|
|
|
define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
|
|
; CHECK-LABEL: @PR27626_4(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
|
|
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shl <vscale x 4 x i64> [[TMP6]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 3
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <vscale x 4 x i64> [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 -1
|
|
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x i32> [[BROADCAST_SPLAT4]])
|
|
; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP13]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[I_PLUS_1:%.*]] = or disjoint i64 [[I]], 1
|
|
; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
|
|
; CHECK-NEXT: [[A_I_PLUS_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_PLUS_1]]
|
|
; CHECK-NEXT: store i32 [[Y]], ptr [[A_I]], align 4
|
|
; CHECK-NEXT: store i32 [[Z]], ptr [[A_I_PLUS_1]], align 4
|
|
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
|
|
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP31:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
|
|
%i_plus_1 = add i64 %i, 1
|
|
%a_i = getelementptr inbounds i32, i32* %a, i64 %i
|
|
%a_i_plus_1 = getelementptr inbounds i32, i32* %a, i64 %i_plus_1
|
|
store i32 %x, i32* %a_i, align 4
|
|
store i32 %y, i32* %a_i, align 4
|
|
store i32 %z, i32* %a_i_plus_1, align 4
|
|
%i.next = add nuw nsw i64 %i, 2
|
|
%cond = icmp slt i64 %i.next, %n
|
|
br i1 %cond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; PR27626_5: Ensure we do not form an interleaved group for strided stores in
|
|
; the presence of a write-after-write dependence.
|
|
|
|
; void PR27626_5(int *a, int x, int y, int z, int n) {
|
|
; for (int i = 3; i < n; i += 2) {
|
|
; a[i - 1] = x;
|
|
; a[i - 3] = y;
|
|
; a[i] = z;
|
|
; }
|
|
; }
|
|
|
|
define void @PR27626_5(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
|
|
; CHECK-LABEL: @PR27626_5(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 5)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[N_VEC]], 1
|
|
; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP6]], 3
|
|
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = shl <vscale x 4 x i64> [[TMP7]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 3
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP10]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 -1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 -3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <vscale x 4 x i64> [[VEC_IND]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], <vscale x 4 x i64> [[TMP11]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], <vscale x 4 x i64> [[TMP12]]
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT4]], <vscale x 4 x ptr> [[TMP13]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]]
|
|
; CHECK-NEXT: [[A_I_MINUS_1:%.*]] = getelementptr i32, ptr [[TMP19]], i64 -1
|
|
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]]
|
|
; CHECK-NEXT: [[A_I_MINUS_3:%.*]] = getelementptr i32, ptr [[TMP20]], i64 -3
|
|
; CHECK-NEXT: store i32 [[X]], ptr [[A_I_MINUS_1]], align 4
|
|
; CHECK-NEXT: store i32 [[Y]], ptr [[A_I_MINUS_3]], align 4
|
|
; CHECK-NEXT: store i32 [[Z]], ptr [[A_I]], align 4
|
|
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
|
|
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP33:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i = phi i64 [ %i.next, %for.body ], [ 3, %entry ]
|
|
%i_minus_1 = sub i64 %i, 1
|
|
%i_minus_3 = sub i64 %i_minus_1, 2
|
|
%a_i = getelementptr inbounds i32, i32* %a, i64 %i
|
|
%a_i_minus_1 = getelementptr inbounds i32, i32* %a, i64 %i_minus_1
|
|
%a_i_minus_3 = getelementptr inbounds i32, i32* %a, i64 %i_minus_3
|
|
store i32 %x, i32* %a_i_minus_1, align 4
|
|
store i32 %y, i32* %a_i_minus_3, align 4
|
|
store i32 %z, i32* %a_i, align 4
|
|
%i.next = add nuw nsw i64 %i, 2
|
|
%cond = icmp slt i64 %i.next, %n
|
|
br i1 %cond, label %for.body, label %for.end
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; PR34743: Ensure that a cast which needs to sink after a load that belongs to
|
|
; an interleaved group, indeeded gets sunk.
|
|
|
|
; void PR34743(short *a, int *b, int n) {
|
|
; for (int i = 0, iv = 0; iv < n; i++, iv += 2) {
|
|
; b[i] = a[iv] * a[iv+1] * a[iv+2];
|
|
; }
|
|
; }
|
|
|
|
define void @PR34743(i16* %a, i32* %b, i64 %n) #1 {
|
|
; CHECK-LABEL: @PR34743(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
|
|
; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N:%.*]], 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[TMP0]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
|
; CHECK: vector.memcheck:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP6]], i64 4
|
|
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
|
|
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[TMP7]], i64 6
|
|
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP2]], [[B]]
|
|
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP]]
|
|
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
|
|
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP8]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
|
|
; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP27]], 2
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[TMP9]], 2
|
|
; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
|
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP11]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; CHECK-NEXT: [[TMP13:%.*]] = shl <vscale x 4 x i64> [[TMP12]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 3
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP15]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_MASKED_GATHER4:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP13]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP17:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[A]], <vscale x 4 x i64> [[TMP16]]
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP18]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison), !alias.scope [[META34:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
|
|
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[A]], <vscale x 4 x i64> [[TMP17]]
|
|
; CHECK-NEXT: [[WIDE_MASKED_GATHER4]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison), !alias.scope [[META34]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> [[VECTOR_RECUR]], <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]], i32 -1)
|
|
; CHECK-NEXT: [[TMP22:%.*]] = sext <vscale x 4 x i16> [[TMP21]] to <vscale x 4 x i32>
|
|
; CHECK-NEXT: [[TMP23:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]] to <vscale x 4 x i32>
|
|
; CHECK-NEXT: [[TMP24:%.*]] = mul nsw <vscale x 4 x i32> [[TMP22]], [[TMP19]]
|
|
; CHECK-NEXT: [[TMP25:%.*]] = mul nsw <vscale x 4 x i32> [[TMP24]], [[TMP23]]
|
|
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP25]], ptr [[TMP26]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
|
|
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 2
|
|
; CHECK-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP31]], -1
|
|
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]], i32 [[TMP32]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV2:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[I1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
|
|
; CHECK-NEXT: [[I1]] = add nuw nsw i64 [[I]], 1
|
|
; CHECK-NEXT: [[IV1:%.*]] = or disjoint i64 [[IV]], 1
|
|
; CHECK-NEXT: [[IV2]] = add nuw nsw i64 [[IV]], 2
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[IV1]]
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = load i16, ptr [[GEP1]], align 4
|
|
; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[LOAD1]] to i32
|
|
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[IV2]]
|
|
; CHECK-NEXT: [[LOAD2]] = load i16, ptr [[GEP2]], align 4
|
|
; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[LOAD2]] to i32
|
|
; CHECK-NEXT: [[MUL01:%.*]] = mul nsw i32 [[CONV]], [[CONV1]]
|
|
; CHECK-NEXT: [[MUL012:%.*]] = mul nsw i32 [[MUL01]], [[CONV2]]
|
|
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
|
|
; CHECK-NEXT: store i32 [[MUL012]], ptr [[ARRAYIDX5]], align 4
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]], !llvm.loop [[LOOP40:![0-9]+]]
|
|
; CHECK: end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%.pre = load i16, i16* %a
|
|
br label %loop
|
|
|
|
loop:
|
|
%0 = phi i16 [ %.pre, %entry ], [ %load2, %loop ]
|
|
%iv = phi i64 [ 0, %entry ], [ %iv2, %loop ]
|
|
%i = phi i64 [ 0, %entry ], [ %i1, %loop ]
|
|
%conv = sext i16 %0 to i32
|
|
%i1 = add nuw nsw i64 %i, 1
|
|
%iv1 = add nuw nsw i64 %iv, 1
|
|
%iv2 = add nuw nsw i64 %iv, 2
|
|
%gep1 = getelementptr inbounds i16, i16* %a, i64 %iv1
|
|
%load1 = load i16, i16* %gep1, align 4
|
|
%conv1 = sext i16 %load1 to i32
|
|
%gep2 = getelementptr inbounds i16, i16* %a, i64 %iv2
|
|
%load2 = load i16, i16* %gep2, align 4
|
|
%conv2 = sext i16 %load2 to i32
|
|
%mul01 = mul nsw i32 %conv, %conv1
|
|
%mul012 = mul nsw i32 %mul01, %conv2
|
|
%arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %i
|
|
store i32 %mul012, i32* %arrayidx5
|
|
%exitcond = icmp eq i64 %iv, %n
|
|
br i1 %exitcond, label %end, label %loop
|
|
|
|
end:
|
|
ret void
|
|
}
|
|
|
|
attributes #1 = { "target-features"="+sve" vscale_range(1, 16) }
|
|
attributes #0 = { "unsafe-fp-math"="true" "target-features"="+sve" vscale_range(1, 16) }
|