In LoopVectorizationCostModel::getInstructionCost(), when the condition canTruncateToMinimalBitwidth() is satisfied, for a trunc, the source type is computed as the smallest type of the source vector and the destination vector, and the destination type is computed as the largest type of the instruction and destination type. This is clearly a logical error, as the original source vector type could be smaller than the original destination vector type, and the trunc semantics are broken because we're attempting to widen. Fixes #47665.
119 lines
6.6 KiB
LLVM
119 lines
6.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
|
|
|
|
define void @test(ptr %p, i64 %a, i8 %b) {
|
|
; CHECK-LABEL: define void @test(
|
|
; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
|
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], <i32 2, i32 2, i32 2, i32 2>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], <i64 48, i64 48, i64 48, i64 48>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], <i64 52, i64 52, i64 52, i64 52>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32>
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], <i32 8, i32 8, i32 8, i32 8>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
|
|
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
|
|
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
|
|
; CHECK: pred.store.if:
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
|
|
; CHECK-NEXT: store i8 [[TMP10]], ptr [[P]], align 1
|
|
; CHECK-NEXT: br label [[VECTOR_BODY]]
|
|
; CHECK: pred.store.continue:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
|
|
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
|
|
; CHECK: pred.store.if3:
|
|
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
|
|
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
|
|
; CHECK: pred.store.continue4:
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
|
|
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
|
|
; CHECK: pred.store.if5:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
|
|
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
|
|
; CHECK: pred.store.continue6:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
|
|
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
|
|
; CHECK: pred.store.if7:
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
|
|
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
|
|
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
|
|
; CHECK: pred.store.continue8:
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
|
|
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
|
|
; CHECK: scalar.ph:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: br label [[FOR_COND:%.*]]
|
|
; CHECK: for.cond:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ]
|
|
; CHECK-NEXT: [[ADD]] = add i32 [[IV]], 1
|
|
; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2
|
|
; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48
|
|
; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52
|
|
; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32
|
|
; CHECK-NEXT: br i1 [[CMP_SLT]], label [[COND_FALSE:%.*]], label [[FOR_BODY]]
|
|
; CHECK: cond.false:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32
|
|
; CHECK-NEXT: br label [[FOR_BODY]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], [[FOR_COND]] ], [ [[ZEXT]], [[COND_FALSE]] ]
|
|
; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8
|
|
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8
|
|
; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 2
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.body, %entry
|
|
%iv = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
|
%add = add i32 %iv, 1
|
|
%cmp.slt = icmp slt i32 %iv, 2
|
|
%shl = shl i64 %a, 48
|
|
%ashr = ashr i64 %shl, 52
|
|
%trunc.i32 = trunc i64 %ashr to i32
|
|
br i1 %cmp.slt, label %cond.false, label %for.body
|
|
|
|
cond.false: ; preds = %for.cond
|
|
%zext = zext i8 %b to i32
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %cond.false, %for.cond
|
|
%cond = phi i32 [ %trunc.i32, %for.cond ], [ %zext, %cond.false ]
|
|
%shl.i32 = shl i32 %cond, 8
|
|
%trunc = trunc i32 %shl.i32 to i8
|
|
store i8 %trunc, ptr %p, align 1
|
|
%cmp = icmp slt i32 %iv, 2
|
|
br i1 %cmp, label %for.cond, label %exit
|
|
|
|
exit: ; preds = %for.body
|
|
ret void
|
|
}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
;.
|