Files
clang-p2996/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll
Florian Hahn 1de3dc7d23 [LV] Bail out early if BTC+1 wraps.
Currently we fail to detect the case where BTC + 1 wraps, i.e. the
vector trip count is 0, In those cases, the minimum iteration count
check will fail, and the vector code will never be executed.

Explicitly check for this condition in computeMaxVF and avoid trying to
vectorize alltogether.

Note that a number of tests needed to be updated, because the vector
loop would never be executed given the input IR.

Fixes https://github.com/llvm/llvm-project/issues/122558.
2025-01-14 22:07:38 +00:00

120 lines
6.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -S %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Test for https://github.com/llvm/llvm-project/issues/111040
define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK-LABEL: define void @smax_call_uniform(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 -68, -69
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[X]], 0
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]]
; CHECK: [[PRED_UREM_IF]]:
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]]
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]]
; CHECK: [[PRED_UREM_CONTINUE]]:
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[REM]], %[[PRED_UREM_IF]] ]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]]
; CHECK: [[PRED_UREM_IF1]]:
; CHECK-NEXT: [[TMP6:%.*]] = urem i64 [[MUL]], [[X]]
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE2]]
; CHECK: [[PRED_UREM_CONTINUE2]]:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_UREM_IF3:.*]], label %[[PRED_UREM_CONTINUE4:.*]]
; CHECK: [[PRED_UREM_IF3]]:
; CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[MUL]], [[X]]
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE4]]
; CHECK: [[PRED_UREM_CONTINUE4]]:
; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ poison, %[[PRED_UREM_CONTINUE2]] ], [ [[TMP8]], %[[PRED_UREM_IF3]] ]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_UREM_IF5:.*]], label %[[PRED_UREM_CONTINUE6]]
; CHECK: [[PRED_UREM_IF5]]:
; CHECK-NEXT: [[TMP11:%.*]] = urem i64 [[MUL]], [[X]]
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]]
; CHECK: [[PRED_UREM_CONTINUE6]]:
; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP9]], i64 0)
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: [[P:%.*]] = select i1 [[TMP14]], i64 [[TMP12]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[TMP15]], i64 [[TMP13]], i64 1
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]]
; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8
; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
; CHECK: [[ELSE]]:
; CHECK-NEXT: [[REM1:%.*]] = urem i64 [[MUL]], [[X]]
; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REM1]], i64 0)
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[LOOP_HEADER]] ], [ [[SMAX]], %[[ELSE]] ]
; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
; CHECK-NEXT: store i64 0, ptr [[GEP1]], align 8
; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT1]], 1024
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%c = icmp ult i8 -68, -69
%mul = mul nsw nuw i64 %x, 0
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
br i1 %c, label %loop.latch, label %else
else:
%rem = urem i64 %mul, %x
%smax = tail call i64 @llvm.smax.i64(i64 %rem, i64 0)
br label %loop.latch
loop.latch:
%p = phi i64 [ 1, %loop.header ], [ %smax, %else ]
%add = add i64 %p, 1
%gep = getelementptr i64, ptr %dst, i64 %add
store i64 0, ptr %gep, align 8
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, 1024
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
declare i64 @llvm.smax.i64(i64, i64)
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.