Reverts llvm/llvm-project#109671 Reverting due to some performance regressions on neoverse-v1.
938 lines
52 KiB
LLVM
938 lines
52 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: opt < %s -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
|
|
; REQUIRES: asserts
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
;; Based on the following C code:
|
|
;;
|
|
;; void simple_histogram(int *buckets, unsigned *indices, int N) {
|
|
;; for (int i = 0; i < N; ++i)
|
|
;; buckets[indices[i]]++;
|
|
;; }
|
|
|
|
;; Confirm finding a histogram operation
|
|
; CHECK-LABEL: Checking a loop in 'simple_histogram'
|
|
; CHECK: LV: Checking for a histogram on: store i32 %inc, ptr %gep.bucket, align 4
|
|
; CHECK: LV: Found histogram for: store i32 %inc, ptr %gep.bucket, align 4
|
|
|
|
;; Confirm cost calculation for runtime checks
|
|
; CHECK-LABEL: LV: Checking a loop in 'simple_histogram_rtdepcheck'
|
|
; CHECK: Calculating cost of runtime checks:
|
|
; CHECK: Total cost of runtime checks:
|
|
; CHECK: LV: Minimum required TC for runtime checks to be profitable:
|
|
|
|
;; Confirm inability to vectorize with potential alias to buckets
|
|
; CHECK-LABEL: LV: Checking a loop in 'simple_histogram_unsafe_alias'
|
|
; CHECK: LV: Can't vectorize due to memory conflicts
|
|
; CHECK-NEXT: LV: Not vectorizing: Cannot prove legality.
|
|
|
|
define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @simple_histogram(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
define void @simple_histogram_inc_param(ptr noalias %buckets, ptr readonly %indices, i64 %N, i32 %incval) #0 {
|
|
; CHECK-LABEL: define void @simple_histogram_inc_param(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]], i32 [[INCVAL:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 [[INCVAL]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], [[INCVAL]]
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, %incval
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
define void @simple_histogram_sub(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @simple_histogram_sub(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 -1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], -1
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = sext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = sub nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
define void @conditional_histogram(ptr noalias %buckets, ptr readonly %indices, ptr readonly %conds, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @conditional_histogram(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], ptr readonly [[CONDS:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP6]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP3]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[CONDS]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD1]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 5100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> [[TMP13]])
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[NEXT:%.*]] ]
|
|
; CHECK-NEXT: [[CONDIDX:%.*]] = getelementptr inbounds i32, ptr [[CONDS]], i64 [[IV1]]
|
|
; CHECK-NEXT: [[CONDDATA:%.*]] = load i32, ptr [[CONDIDX]], align 4
|
|
; CHECK-NEXT: [[IFCOND:%.*]] = icmp sgt i32 [[CONDDATA]], 5100
|
|
; CHECK-NEXT: br i1 [[IFCOND]], label [[IFTRUE:%.*]], label [[NEXT]]
|
|
; CHECK: iftrue:
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP1]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX3]], align 4
|
|
; CHECK-NEXT: br label [[NEXT]]
|
|
; CHECK: next:
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %next ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%condidx = getelementptr inbounds i32, ptr %conds, i64 %iv
|
|
%conddata = load i32, ptr %condidx, align 4
|
|
%ifcond = icmp sgt i32 %conddata, 5100
|
|
br i1 %ifcond, label %iftrue, label %next
|
|
|
|
iftrue:
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
br label %next
|
|
|
|
next:
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @histogram_8bit(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP9]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP6]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i8(<vscale x 4 x ptr> [[TMP7]], i8 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
|
|
; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_INDICES]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i8 [[TMP1]], 1
|
|
; CHECK-NEXT: store i8 [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i8, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i8, ptr %gep.bucket, align 4
|
|
%inc = add nsw i8 %l.bucket, 1
|
|
store i8 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
;; We don't currently support floating point histograms.
|
|
define void @histogram_float(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @histogram_float(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = fadd fast float [[TMP1]], 1.000000e+00
|
|
; CHECK-NEXT: store float [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds float, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load float, ptr %gep.bucket, align 4
|
|
%inc = fadd fast float %l.bucket, 1.0
|
|
store float %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
;; We don't support histograms with a update value that's not loop-invariant.
|
|
define void @histogram_varying_increment(ptr noalias %buckets, ptr readonly %indices, ptr readonly %incvals, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @histogram_varying_increment(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], ptr readonly [[INCVALS:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INCIDX:%.*]] = getelementptr inbounds i32, ptr [[INCVALS]], i64 [[IV]]
|
|
; CHECK-NEXT: [[INCVAL:%.*]] = load i32, ptr [[INCIDX]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], [[INCVAL]]
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%gep.incvals = getelementptr inbounds i32, ptr %incvals, i64 %iv
|
|
%l.incval = load i32, ptr %gep.incvals, align 4
|
|
%inc = add nsw i32 %l.bucket, %l.incval
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
;; Test that interleaving works when vectorizing.
|
|
define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @simple_histogram_user_interleave(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP15]], 4
|
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[DOTIDX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP17]], align 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP19]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP21]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !0
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
;; Test that we can handle more than one GEP index.
|
|
@idx_array = dso_local local_unnamed_addr global [1048576 x i32] zeroinitializer, align 4
|
|
@data_array = dso_local local_unnamed_addr global [1048576 x i32] zeroinitializer, align 4
|
|
|
|
define void @histogram_array_3op_gep(i64 noundef %N) #0 {
|
|
; CHECK-LABEL: define void @histogram_array_3op_gep(
|
|
; CHECK-SAME: i64 noundef [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1048576 x i32], ptr @idx_array, i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
|
|
; CHECK-NEXT: [[TMP14:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1048576 x i32], ptr @data_array, i64 0, <vscale x 4 x i64> [[TMP14]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP11]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1048576 x i32], ptr @idx_array, i64 0, i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP9]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1048576 x i32], ptr @data_array, i64 0, i64 [[IDXPROM5]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX6]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds [1048576 x i32], ptr @idx_array, i64 0, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom5 = sext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds [1048576 x i32], ptr @data_array, i64 0, i64 %idxprom5
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
;; Add a struct into the mix, use a different constant index.
|
|
;; { unused, buckets }
|
|
%somestruct = type { [1048576 x i32], [1048576 x i32] }
|
|
|
|
define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr readonly %indices, ptr noalias %data.struct) #0 {
|
|
; CHECK-LABEL: define void @histogram_array_4op_gep_nonzero_const_idx(
|
|
; CHECK-SAME: i64 noundef [[N:%.*]], ptr readonly [[INDICES:%.*]], ptr noalias [[DATA_STRUCT:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[SOMESTRUCT:%.*]], ptr [[DATA_STRUCT]], i64 1, i32 0, <vscale x 4 x i64> [[TMP6]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP7]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
|
|
; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
|
|
; CHECK-NEXT: [[L_IDX:%.*]] = load i32, ptr [[GEP_INDICES]], align 4
|
|
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[L_IDX]] to i64
|
|
; CHECK-NEXT: [[GEP_BUCKET:%.*]] = getelementptr inbounds [[SOMESTRUCT]], ptr [[DATA_STRUCT]], i64 1, i32 0, i64 [[IDXPROM5]]
|
|
; CHECK-NEXT: [[L_BUCKET:%.*]] = load i32, ptr [[GEP_BUCKET]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[L_BUCKET]], 1
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_BUCKET]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom5 = sext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds %somestruct, ptr %data.struct, i32 1, i32 0, i64 %idxprom5
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
;; Make sure the histogram intrinsic uses the active lane mask when tail folding.
|
|
define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @simple_histogram_tailfold(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP2]], 2
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP5]])
|
|
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP9]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP10]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
|
|
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]])
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: br i1 poison, label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !2
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
;; Check that we can still vectorize a histogram when LAA found another dependency
|
|
;; that doesn't conflict with the buckets.
|
|
define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @simple_histogram_rtdepcheck(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr [[ARRAY:%.*]], ptr [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
|
; CHECK: vector.memcheck:
|
|
; CHECK-NEXT: [[ARRAY1:%.*]] = ptrtoint ptr [[ARRAY]] to i64
|
|
; CHECK-NEXT: [[INDICES2:%.*]] = ptrtoint ptr [[INDICES]] to i64
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[ARRAY1]], [[INDICES2]]
|
|
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]]
|
|
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP6]], -4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
|
|
; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw nsw i64 [[TMP8]] to i32
|
|
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0
|
|
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP12]], align 4
|
|
; CHECK-NEXT: [[TMP13:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP13]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP14]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]]
|
|
; CHECK-NEXT: store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP15]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP17]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IDX_ADDR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IV]]
|
|
; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
|
|
; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[IDX_ADDR]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%idx.addr = getelementptr inbounds i32, ptr %array, i64 %iv
|
|
%iv.trunc = trunc i64 %iv to i32
|
|
store i32 %iv.trunc, ptr %idx.addr, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
;; Make sure we don't vectorize if there's a potential alias between buckets
|
|
;; and indices.
|
|
define void @simple_histogram_unsafe_alias(ptr %buckets, ptr %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @simple_histogram_unsafe_alias(
|
|
; CHECK-SAME: ptr [[BUCKETS:%.*]], ptr [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1
|
|
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
|
|
%l.idx = load i32, ptr %gep.indices, align 4
|
|
%idxprom1 = zext i32 %l.idx to i64
|
|
%gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
|
|
%l.bucket = load i32, ptr %gep.bucket, align 4
|
|
%inc = add nsw i32 %l.bucket, 1
|
|
store i32 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
|
|
; CHECK-LABEL: define void @simple_histogram_64b(
|
|
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -2
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 1
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[INDICES]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP5]], align 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[BUCKETS]], <vscale x 2 x i64> [[WIDE_LOAD]]
|
|
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> [[TMP6]], i64 1, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i64, ptr [[INDICES]], i64 [[IV]]
|
|
; CHECK-NEXT: [[L_IDX:%.*]] = load i64, ptr [[GEP_INDICES]], align 4
|
|
; CHECK-NEXT: [[GEP_BUCKET:%.*]] = getelementptr inbounds i64, ptr [[BUCKETS]], i64 [[L_IDX]]
|
|
; CHECK-NEXT: [[L_BUCKET:%.*]] = load i64, ptr [[GEP_BUCKET]], align 4
|
|
; CHECK-NEXT: [[INC:%.*]] = add nsw i64 [[L_BUCKET]], 1
|
|
; CHECK-NEXT: store i64 [[INC]], ptr [[GEP_BUCKET]], align 4
|
|
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
|
|
; CHECK: for.exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
|
%gep.indices = getelementptr inbounds i64, ptr %indices, i64 %iv
|
|
%l.idx = load i64, ptr %gep.indices, align 4
|
|
%gep.bucket = getelementptr inbounds i64, ptr %buckets, i64 %l.idx
|
|
%l.bucket = load i64, ptr %gep.bucket, align 4
|
|
%inc = add nsw i64 %l.bucket, 1
|
|
store i64 %inc, ptr %gep.bucket, align 4
|
|
%iv.next = add nuw nsw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %N
|
|
br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !4
|
|
|
|
for.exit:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve2" vscale_range(1,16) }
|
|
|
|
!0 = distinct !{!0, !1}
|
|
!1 = !{!"llvm.loop.interleave.count", i32 2}
|
|
!2 = distinct !{!2, !3}
|
|
!3 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
|
|
!4 = distinct !{!4, !5}
|
|
!5 = !{!"llvm.loop.interleave.count", i32 1}
|