On known hardware, reductions, gather, and scatter operations have execution latencies which correlated with the vector length (VL) of the operation. Most other operations (e.g. simply arithmetic) don't correlated in this way, and instead essentially fixed cost as VL varies. When I'd implemented initial scalable cost model support for reductions, gather, and scatter operations, I had used an upper bound on the statically unknown VL. The argument at the time was that this prevented falsely low costs, and biased the vectorizer away from generating bad (on some hardware) code. Unfortunately, practical experience shows we were a bit too effective at that goal, and the high costs defacto prevents vectorization using these constructs at all. This patch reverses course, and ties the returned cost not to the maximum possible VL, but the VL which would correspond to VScaleForTuning. This parameter is the same one the vectorizer uses when normalizing loop costs, so the term effectively cancels out. The result is that the vectorizer now sees these constructs as comparable in cost to their fixed length variants. This does introduce the possibility of the cost for these operations being a significant under estimate on platforms where actual VLEN is far from that implied by VScaleForTuning. On such platforms, we might make poor heuristic choices. Probably not in LV itself (due to the cancellation mentioned above), but possibly during e.g. lowering. I'm not currently aware of any concrete examples of this, but this patch does open a concern which did not previously exist. Previously, we had the problem of overestimating costs causing the same problem on machines much closer to default values for vscale for tuning. With this patch, we still have that problem potentially if vscale for tuning is set high (manually), and then the code is run on a narrow VLEN machine. Differential Revision: https://reviews.llvm.org/D131519
720 lines
43 KiB
LLVM
720 lines
43 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLENUNK
|
|
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLEN128
|
|
|
|
; Note: +v implies a Zvl128b (i.e. minimal VLEN of 128), but as can be seen, we're currently
|
|
; not using that information unless an explicit vector width is set. (FIXME)
|
|
|
|
; A collection of fairly basic functional tests when both fixed and scalable vectorization is
|
|
; allowed. The primary goal of this is check for crashes during cost modeling, but it also
|
|
; exercises the default heuristics in a useful way.
|
|
|
|
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
|
target triple = "riscv64"
|
|
|
|
define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
|
|
; VLENUNK-LABEL: @vector_add(
|
|
; VLENUNK-NEXT: entry:
|
|
; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLENUNK: vector.ph:
|
|
; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
|
|
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLENUNK: vector.body:
|
|
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
|
|
; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
|
; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
|
|
; VLENUNK-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; VLENUNK-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8
|
|
; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
|
|
; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; VLENUNK: middle.block:
|
|
; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLENUNK: scalar.ph:
|
|
; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLENUNK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLENUNK: for.body:
|
|
; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
|
|
; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
|
|
; VLENUNK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
|
|
; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; VLENUNK: for.end:
|
|
; VLENUNK-NEXT: ret void
|
|
;
|
|
; VLEN128-LABEL: @vector_add(
|
|
; VLEN128-NEXT: entry:
|
|
; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLEN128: vector.ph:
|
|
; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
|
|
; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
|
|
; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLEN128: vector.body:
|
|
; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
|
|
; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
|
; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
|
|
; VLEN128-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; VLEN128-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8
|
|
; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
|
|
; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; VLEN128: middle.block:
|
|
; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLEN128: scalar.ph:
|
|
; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLEN128-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLEN128: for.body:
|
|
; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
|
|
; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
|
|
; VLEN128-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
|
|
; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; VLEN128: for.end:
|
|
; VLEN128-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
|
|
%elem = load i64, ptr %arrayidx
|
|
%add = add i64 %elem, %v
|
|
store i64 %add, ptr %arrayidx
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, 1024
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; Same as above, but with op type of i32. We currently have a bug around
|
|
; etype=ELEN profitability in the vectorizer, and having a smaller element
|
|
; width test allows us to highlight different aspects of codegen.
|
|
define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) {
|
|
; VLENUNK-LABEL: @vector_add_i32(
|
|
; VLENUNK-NEXT: entry:
|
|
; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
|
; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
|
|
; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLENUNK: vector.ph:
|
|
; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
|
; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
|
|
; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLENUNK: vector.body:
|
|
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
|
|
; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
|
|
; VLENUNK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0
|
|
; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1
|
|
; VLENUNK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]]
|
|
; VLENUNK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]]
|
|
; VLENUNK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]]
|
|
; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
|
|
; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 4
|
|
; VLENUNK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
|
|
; VLENUNK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2
|
|
; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]]
|
|
; VLENUNK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP15]], align 4
|
|
; VLENUNK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; VLENUNK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]]
|
|
; VLENUNK-NEXT: store <vscale x 2 x i32> [[TMP16]], ptr [[TMP12]], align 4
|
|
; VLENUNK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
|
|
; VLENUNK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2
|
|
; VLENUNK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]]
|
|
; VLENUNK-NEXT: store <vscale x 2 x i32> [[TMP17]], ptr [[TMP20]], align 4
|
|
; VLENUNK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4
|
|
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
|
|
; VLENUNK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; VLENUNK: middle.block:
|
|
; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLENUNK: scalar.ph:
|
|
; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLENUNK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLENUNK: for.body:
|
|
; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
|
|
; VLENUNK-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; VLENUNK-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]]
|
|
; VLENUNK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
|
|
; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; VLENUNK: for.end:
|
|
; VLENUNK-NEXT: ret void
|
|
;
|
|
; VLEN128-LABEL: @vector_add_i32(
|
|
; VLEN128-NEXT: entry:
|
|
; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
|
|
; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
|
|
; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLEN128: vector.ph:
|
|
; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
|
|
; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
|
|
; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0
|
|
; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; VLEN128-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0
|
|
; VLEN128-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLEN128: vector.body:
|
|
; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
|
|
; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
|
|
; VLEN128-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0
|
|
; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1
|
|
; VLEN128-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]]
|
|
; VLEN128-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]]
|
|
; VLEN128-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]]
|
|
; VLEN128-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
|
|
; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 4
|
|
; VLEN128-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
|
|
; VLEN128-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2
|
|
; VLEN128-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]]
|
|
; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP15]], align 4
|
|
; VLEN128-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; VLEN128-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]]
|
|
; VLEN128-NEXT: store <vscale x 2 x i32> [[TMP16]], ptr [[TMP12]], align 4
|
|
; VLEN128-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
|
|
; VLEN128-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2
|
|
; VLEN128-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]]
|
|
; VLEN128-NEXT: store <vscale x 2 x i32> [[TMP17]], ptr [[TMP20]], align 4
|
|
; VLEN128-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4
|
|
; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
|
|
; VLEN128-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; VLEN128: middle.block:
|
|
; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLEN128: scalar.ph:
|
|
; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLEN128-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLEN128: for.body:
|
|
; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
|
|
; VLEN128-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; VLEN128-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]]
|
|
; VLEN128-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
|
|
; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; VLEN128: for.end:
|
|
; VLEN128-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
|
|
%elem = load i32, ptr %arrayidx
|
|
%add = add i32 %elem, %v
|
|
store i32 %add, ptr %arrayidx
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, 1024
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
|
|
; a[b[i]] += v, mostly to exercise scatter/gather costing
|
|
; TODO: Currently fails to vectorize due to a memory conflict
|
|
define void @indexed_add(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
|
|
; VLENUNK-LABEL: @indexed_add(
|
|
; VLENUNK-NEXT: entry:
|
|
; VLENUNK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLENUNK: for.body:
|
|
; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]]
|
|
; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
|
|
; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]]
|
|
; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
|
|
; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]]
|
|
; VLENUNK-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8
|
|
; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; VLENUNK: for.end:
|
|
; VLENUNK-NEXT: ret void
|
|
;
|
|
; VLEN128-LABEL: @indexed_add(
|
|
; VLEN128-NEXT: entry:
|
|
; VLEN128-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLEN128: for.body:
|
|
; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]]
|
|
; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
|
|
; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]]
|
|
; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
|
|
; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]]
|
|
; VLEN128-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8
|
|
; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
|
|
; VLEN128: for.end:
|
|
; VLEN128-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%baddr = getelementptr inbounds i64, ptr %b, i64 %iv
|
|
%aidx = load i64, ptr %baddr
|
|
%aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
|
|
%elem = load i64, ptr %aaddr
|
|
%add = add i64 %elem, %v
|
|
store i64 %add, ptr %aaddr
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, 1024
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; a[b[i]] = v, exercise scatter support
|
|
define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
|
|
; VLENUNK-LABEL: @indexed_store(
|
|
; VLENUNK-NEXT: entry:
|
|
; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLENUNK: vector.ph:
|
|
; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
|
|
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLENUNK: vector.body:
|
|
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]]
|
|
; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
|
; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
|
|
; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]]
|
|
; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
|
|
; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
|
|
; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; VLENUNK: middle.block:
|
|
; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLENUNK: scalar.ph:
|
|
; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLENUNK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLENUNK: for.body:
|
|
; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
|
|
; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
|
|
; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
|
|
; VLENUNK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8
|
|
; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; VLENUNK: for.end:
|
|
; VLENUNK-NEXT: ret void
|
|
;
|
|
; VLEN128-LABEL: @indexed_store(
|
|
; VLEN128-NEXT: entry:
|
|
; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLEN128: vector.ph:
|
|
; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
|
|
; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
|
|
; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLEN128: vector.body:
|
|
; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]]
|
|
; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
|
; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
|
|
; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]]
|
|
; VLEN128-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
|
|
; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
|
|
; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; VLEN128: middle.block:
|
|
; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLEN128: scalar.ph:
|
|
; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLEN128-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLEN128: for.body:
|
|
; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
|
|
; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
|
|
; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
|
|
; VLEN128-NEXT: store i64 [[V]], ptr [[AADDR]], align 8
|
|
; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; VLEN128: for.end:
|
|
; VLEN128-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%baddr = getelementptr inbounds i64, ptr %b, i64 %iv
|
|
%aidx = load i64, ptr %baddr
|
|
%aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
|
|
store i64 %v, ptr %aaddr
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, 1024
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
|
|
; VLENUNK-LABEL: @indexed_load(
|
|
; VLENUNK-NEXT: entry:
|
|
; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLENUNK: vector.ph:
|
|
; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLENUNK: vector.body:
|
|
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]]
|
|
; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
|
; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
|
|
; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]]
|
|
; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> undef)
|
|
; VLENUNK-NEXT: [[TMP6]] = add <vscale x 1 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
|
|
; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
|
|
; VLENUNK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; VLENUNK: middle.block:
|
|
; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[TMP6]])
|
|
; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLENUNK: scalar.ph:
|
|
; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
; VLENUNK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLENUNK: for.body:
|
|
; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
|
|
; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
|
|
; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
|
|
; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
|
|
; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLENUNK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]]
|
|
; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; VLENUNK: for.end:
|
|
; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]]
|
|
;
|
|
; VLEN128-LABEL: @indexed_load(
|
|
; VLEN128-NEXT: entry:
|
|
; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLEN128: vector.ph:
|
|
; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLEN128: vector.body:
|
|
; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
|
|
; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]]
|
|
; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
|
; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8
|
|
; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 1 x i64> [[WIDE_LOAD]]
|
|
; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP5]], i32 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i32 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> undef)
|
|
; VLEN128-NEXT: [[TMP6]] = add <vscale x 1 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
|
|
; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
|
|
; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; VLEN128: middle.block:
|
|
; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[TMP6]])
|
|
; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLEN128: scalar.ph:
|
|
; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
; VLEN128-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLEN128: for.body:
|
|
; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLEN128-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
|
|
; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
|
|
; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
|
|
; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
|
|
; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLEN128-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]]
|
|
; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; VLEN128: for.end:
|
|
; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
|
|
; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%sum = phi i64 [0, %entry], [%sum.next, %for.body]
|
|
%baddr = getelementptr inbounds i64, ptr %b, i64 %iv
|
|
%aidx = load i64, ptr %baddr
|
|
%aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
|
|
%elem = load i64, ptr %aaddr
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%sum.next = add i64 %sum, %elem
|
|
%exitcond.not = icmp eq i64 %iv.next, 1024
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i64 %sum.next
|
|
}
|
|
|
|
define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
|
|
; VLENUNK-LABEL: @splat_int(
|
|
; VLENUNK-NEXT: entry:
|
|
; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLENUNK: vector.ph:
|
|
; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
|
|
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLENUNK: vector.body:
|
|
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
|
|
; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
|
; VLENUNK-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
|
|
; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; VLENUNK: middle.block:
|
|
; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLENUNK: scalar.ph:
|
|
; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLENUNK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLENUNK: for.body:
|
|
; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
|
|
; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; VLENUNK: for.end:
|
|
; VLENUNK-NEXT: ret void
|
|
;
|
|
; VLEN128-LABEL: @splat_int(
|
|
; VLEN128-NEXT: entry:
|
|
; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLEN128: vector.ph:
|
|
; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
|
|
; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
|
|
; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLEN128: vector.body:
|
|
; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
|
|
; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
|
|
; VLEN128-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
|
|
; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; VLEN128: middle.block:
|
|
; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLEN128: scalar.ph:
|
|
; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLEN128-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLEN128: for.body:
|
|
; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
|
|
; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; VLEN128: for.end:
|
|
; VLEN128-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
|
|
store i64 %v, ptr %arrayidx
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, 1024
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) {
|
|
; VLENUNK-LABEL: @splat_ptr(
|
|
; VLENUNK-NEXT: entry:
|
|
; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLENUNK: vector.ph:
|
|
; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i32 0
|
|
; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
|
|
; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLENUNK: vector.body:
|
|
; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
|
|
; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0
|
|
; VLENUNK-NEXT: store <vscale x 1 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
|
|
; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; VLENUNK: middle.block:
|
|
; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLENUNK: scalar.ph:
|
|
; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLENUNK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLENUNK: for.body:
|
|
; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; VLENUNK-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8
|
|
; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
; VLENUNK: for.end:
|
|
; VLENUNK-NEXT: ret void
|
|
;
|
|
; VLEN128-LABEL: @splat_ptr(
|
|
; VLEN128-NEXT: entry:
|
|
; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]]
|
|
; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; VLEN128: vector.ph:
|
|
; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
|
|
; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
|
|
; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i32 0
|
|
; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
|
|
; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; VLEN128: vector.body:
|
|
; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
|
|
; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
|
|
; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0
|
|
; VLEN128-NEXT: store <vscale x 1 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
|
|
; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; VLEN128: middle.block:
|
|
; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
|
|
; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
|
|
; VLEN128: scalar.ph:
|
|
; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; VLEN128-NEXT: br label [[FOR_BODY:%.*]]
|
|
; VLEN128: for.body:
|
|
; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
|
|
; VLEN128-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8
|
|
; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
|
|
; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
; VLEN128: for.end:
|
|
; VLEN128-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
|
|
store ptr %v, ptr %arrayidx
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond.not = icmp eq i64 %iv.next, 1024
|
|
br i1 %exitcond.not, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|