Implement VPBlendRecipe::computeCost. VPBlendRecipe is currently is also used if only the first lane is used. This also requires pre-computing costs for forced scalars and instructions considered profitable to scalarize. For those, the cost will be computed separately in the legacy cost model. This will also be needed when implementing VPReplicateRecipe::computeCost.
121 lines
6.0 KiB
LLVM
121 lines
6.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -p loop-vectorize -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; Test for https://github.com/llvm/llvm-project/issues/111040
|
|
define void @smax_call_uniform(ptr %dst, i64 %x) {
|
|
; CHECK-LABEL: define void @smax_call_uniform(
|
|
; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69
|
|
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0
|
|
; CHECK-NEXT: br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]]
|
|
; CHECK: [[PRED_UREM_IF]]:
|
|
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]]
|
|
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]]
|
|
; CHECK: [[PRED_UREM_CONTINUE]]:
|
|
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[REM]], %[[PRED_UREM_IF]] ]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]]
|
|
; CHECK: [[PRED_UREM_IF1]]:
|
|
; CHECK-NEXT: [[TMP6:%.*]] = urem i64 [[MUL]], [[X]]
|
|
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]]
|
|
; CHECK: [[PRED_UREM_CONTINUE2]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_UREM_IF3:.*]], label %[[PRED_UREM_CONTINUE4:.*]]
|
|
; CHECK: [[PRED_UREM_IF3]]:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[MUL]], [[X]]
|
|
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]]
|
|
; CHECK: [[PRED_UREM_CONTINUE4]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP8]], %[[PRED_UREM_IF3]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UREM_IF5:.*]], label %[[PRED_UREM_CONTINUE6]]
|
|
; CHECK: [[PRED_UREM_IF5]]:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = urem i64 [[MUL]], [[X]]
|
|
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]]
|
|
; CHECK: [[PRED_UREM_CONTINUE6]]:
|
|
; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP4]], i64 0)
|
|
; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP9]], i64 0)
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
|
|
; CHECK-NEXT: [[P:%.*]] = select i1 [[TMP14]], i64 [[TMP12]], i64 1
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[TMP15]], i64 [[TMP13]], i64 1
|
|
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1
|
|
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1
|
|
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]]
|
|
; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8
|
|
; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
|
|
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
|
|
; CHECK: [[ELSE]]:
|
|
; CHECK-NEXT: [[REM1:%.*]] = urem i64 [[MUL]], [[X]]
|
|
; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REM1]], i64 0)
|
|
; CHECK-NEXT: br label %[[LOOP_LATCH]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[LOOP_HEADER]] ], [ [[SMAX]], %[[ELSE]] ]
|
|
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: store i64 0, ptr [[GEP1]], align 8
|
|
; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1
|
|
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT1]], 0
|
|
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%c = icmp ult i8 -68, -69
|
|
%mul = mul nsw nuw i64 %x, 0
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
br i1 %c, label %loop.latch, label %else
|
|
|
|
else:
|
|
%rem = urem i64 %mul, %x
|
|
%smax = tail call i64 @llvm.smax.i64(i64 %rem, i64 0)
|
|
br label %loop.latch
|
|
|
|
loop.latch:
|
|
%p = phi i64 [ 1, %loop.header ], [ %smax, %else ]
|
|
%add = add i64 %p, 1
|
|
%gep = getelementptr i64, ptr %dst, i64 %add
|
|
store i64 0, ptr %gep, align 8
|
|
%iv.next = add i64 %iv, 1
|
|
%ec = icmp eq i64 %iv.next, 0
|
|
br i1 %ec, label %exit, label %loop.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare i64 @llvm.smax.i64(i64, i64)
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
;.
|