At the moment, some VPInstructions create only a single scalar value, but use VPTransformatState's 'vector' storage for this value. Those values are effectively uniform-per-VF (or in some cases uniform-across-VF-and-UF). Using the vector/per-part storage doesn't interact well with other recipes, that more accurately using (Part, Lane) to look up scalar values and prevents VPInstructions creating scalars from interacting with other recipes working with scalars. This PR tries to unify handling of scalars by using (Part, 0) for scalar values where only the first lane is demanded. This allows using VPInstructions with other recipes like VPScalarCastRecipe and is also needed when using VPInstructions in more cases otuside the vector loop region to generate scalars. Depends on https://github.com/llvm/llvm-project/pull/80269
278 lines
21 KiB
LLVM
278 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=none < %s | FileCheck %s --check-prefix=NONE
|
|
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data < %s | FileCheck %s --check-prefix=DATA
|
|
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-without-lane-mask < %s | FileCheck %s --check-prefix=DATA_NO_LANEMASK
|
|
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-and-control < %s | FileCheck %s --check-prefix=DATA_AND_CONTROL
|
|
; RUN: opt -S -passes=loop-vectorize -force-tail-folding-style=data-and-control-without-rt-check < %s | FileCheck %s --check-prefix=DATA_AND_CONTROL_NO_RT_CHECK
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
; Test the different tail folding styles.
|
|
|
|
define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features" = "+sve" {
|
|
; NONE-LABEL: @simple_memset_tailfold(
|
|
; NONE-NEXT: entry:
|
|
; NONE-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
|
; NONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; NONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
|
; NONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP1]]
|
|
; NONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; NONE: vector.ph:
|
|
; NONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; NONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
|
; NONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP3]]
|
|
; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
|
|
; NONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
|
; NONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
|
|
; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
|
|
; NONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; NONE-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; NONE: vector.body:
|
|
; NONE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
|
; NONE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 0
|
|
; NONE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]]
|
|
; NONE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0
|
|
; NONE-NEXT: store <vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 4
|
|
; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]]
|
|
; NONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
|
; NONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; NONE: middle.block:
|
|
; NONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]]
|
|
; NONE-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
; NONE: scalar.ph:
|
|
; NONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; NONE-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; NONE: while.body:
|
|
; NONE-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; NONE-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]]
|
|
; NONE-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4
|
|
; NONE-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
|
|
; NONE-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
|
|
; NONE-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; NONE: while.end.loopexit:
|
|
; NONE-NEXT: ret void
|
|
;
|
|
; DATA-LABEL: @simple_memset_tailfold(
|
|
; DATA-NEXT: entry:
|
|
; DATA-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
|
; DATA-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
|
|
; DATA-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
|
|
; DATA-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
|
|
; DATA-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; DATA: vector.ph:
|
|
; DATA-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
|
; DATA-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
|
; DATA-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
|
; DATA-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
|
; DATA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
|
; DATA-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
|
; DATA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
|
|
; DATA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
|
|
; DATA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; DATA-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; DATA: vector.body:
|
|
; DATA-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
|
; DATA-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
|
; DATA-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP9]], i64 [[UMAX]])
|
|
; DATA-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]]
|
|
; DATA-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
|
|
; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
|
; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]]
|
|
; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
|
|
; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; DATA: middle.block:
|
|
; DATA-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
; DATA: scalar.ph:
|
|
; DATA-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; DATA-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; DATA: while.body:
|
|
; DATA-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; DATA-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]]
|
|
; DATA-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4
|
|
; DATA-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
|
|
; DATA-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
|
|
; DATA-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; DATA: while.end.loopexit:
|
|
; DATA-NEXT: ret void
|
|
;
|
|
; DATA_NO_LANEMASK-LABEL: @simple_memset_tailfold(
|
|
; DATA_NO_LANEMASK-NEXT: entry:
|
|
; DATA_NO_LANEMASK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
|
|
; DATA_NO_LANEMASK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; DATA_NO_LANEMASK: vector.ph:
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
|
; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
|
; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
|
; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
|
; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
|
|
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
|
|
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
|
|
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; DATA_NO_LANEMASK: vector.body:
|
|
; DATA_NO_LANEMASK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY]] ]
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
|
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[INDEX1]], i64 0
|
|
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP11:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
|
|
; DATA_NO_LANEMASK-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT3]], [[TMP11]]
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]]
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
|
|
; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[TMP12]])
|
|
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP16]]
|
|
; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
|
|
; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; DATA_NO_LANEMASK: middle.block:
|
|
; DATA_NO_LANEMASK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
; DATA_NO_LANEMASK: scalar.ph:
|
|
; DATA_NO_LANEMASK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; DATA_NO_LANEMASK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; DATA_NO_LANEMASK: while.body:
|
|
; DATA_NO_LANEMASK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; DATA_NO_LANEMASK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]]
|
|
; DATA_NO_LANEMASK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4
|
|
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
|
|
; DATA_NO_LANEMASK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
|
|
; DATA_NO_LANEMASK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; DATA_NO_LANEMASK: while.end.loopexit:
|
|
; DATA_NO_LANEMASK-NEXT: ret void
|
|
;
|
|
; DATA_AND_CONTROL-LABEL: @simple_memset_tailfold(
|
|
; DATA_AND_CONTROL-NEXT: entry:
|
|
; DATA_AND_CONTROL-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
|
; DATA_AND_CONTROL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
|
|
; DATA_AND_CONTROL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_AND_CONTROL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
|
|
; DATA_AND_CONTROL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
|
|
; DATA_AND_CONTROL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; DATA_AND_CONTROL: vector.ph:
|
|
; DATA_AND_CONTROL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_AND_CONTROL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
|
|
; DATA_AND_CONTROL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_AND_CONTROL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
|
|
; DATA_AND_CONTROL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
|
|
; DATA_AND_CONTROL-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
|
|
; DATA_AND_CONTROL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
|
|
; DATA_AND_CONTROL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
|
; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4
|
|
; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
|
|
; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
|
|
; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; DATA_AND_CONTROL-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; DATA_AND_CONTROL: vector.body:
|
|
; DATA_AND_CONTROL-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
|
; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; DATA_AND_CONTROL-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 0
|
|
; DATA_AND_CONTROL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]]
|
|
; DATA_AND_CONTROL-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
|
|
; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
|
; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]]
|
|
; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]])
|
|
; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; DATA_AND_CONTROL-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[TMP14]], i32 0
|
|
; DATA_AND_CONTROL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; DATA_AND_CONTROL: middle.block:
|
|
; DATA_AND_CONTROL-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
; DATA_AND_CONTROL: scalar.ph:
|
|
; DATA_AND_CONTROL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; DATA_AND_CONTROL-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; DATA_AND_CONTROL: while.body:
|
|
; DATA_AND_CONTROL-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; DATA_AND_CONTROL-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]]
|
|
; DATA_AND_CONTROL-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4
|
|
; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
|
|
; DATA_AND_CONTROL-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
|
|
; DATA_AND_CONTROL-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; DATA_AND_CONTROL: while.end.loopexit:
|
|
; DATA_AND_CONTROL-NEXT: ret void
|
|
;
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-LABEL: @simple_memset_tailfold(
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: entry:
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK: vector.ph:
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[UMAX]], [[TMP6]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]])
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK: vector.body:
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[TMP15]], i32 0
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK: middle.block:
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK: scalar.ph:
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_BODY:%.*]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK: while.body:
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; DATA_AND_CONTROL_NO_RT_CHECK: while.end.loopexit:
|
|
; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body, %entry
|
|
%index = phi i64 [ %index.next, %while.body ], [ 0, %entry ]
|
|
%gep = getelementptr i32, ptr %ptr, i64 %index
|
|
store i32 %val, ptr %gep
|
|
%index.next = add nsw i64 %index, 1
|
|
%cmp10 = icmp ult i64 %index.next, %n
|
|
br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0
|
|
|
|
while.end.loopexit: ; preds = %while.body
|
|
ret void
|
|
}
|
|
|
|
!0 = distinct !{!0, !1, !2, !3, !4}
|
|
!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
|
|
!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
|
!3 = !{!"llvm.loop.interleave.count", i32 1}
|
|
!4 = !{!"llvm.loop.vectorize.width", i32 4}
|