Files
clang-p2996/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
Florian Hahn 6ef829941b Recommit "[VPlan] Replace disjoint or with add instead of dropping disjoint. (#83821)"
Recommit with a fix for the use-after-free causing the revert.
This reverts the revert commit f872043e05.

Original commit message:

Dropping disjoint from an OR may yield incorrect results, as some
analysis may have converted it to an Add implicitly (e.g. SCEV used for
dependence analysis). Instead, replace it with an equivalent Add.

This is possible as all users of the disjoint OR only access lanes where
the operands are disjoint or poison otherwise.

Note that replacing all disjoint ORs with ADDs instead of dropping the
flags is not strictly necessary. It is only needed for disjoint ORs that
SCEV treated as ADDs, but those are not tracked.

There are other places that may drop poison-generating flags; those
likely need similar treatment.

Fixes https://github.com/llvm/llvm-project/issues/81872

PR: https://github.com/llvm/llvm-project/pull/83821
2024-03-27 19:11:18 +00:00

108 lines
5.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; PR 81872 explains the issue.
; If we vectorize, we have a miscompile where array IV and thereby value stored in (arr[99],
; arr[98]) is calculated incorrectly since disjoint or was only disjoint because
; of dominating conditions. Dropping the disjoint to avoid poison still changes
; the behaviour since now the or is no longer equivalent to the add.
;
define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr noundef align 8 dereferenceable_or_null(16) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: bb5:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF0:![0-9]+]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 99, i64 98, i64 97, i64 96>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 99, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB5:%.*]] ]
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0
; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]]
; CHECK: bb18:
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1
; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]]
; CHECK-NEXT: store i64 1, ptr [[GETELEMENTPTR19]], align 8
; CHECK-NEXT: br label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90
; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: bb6:
; CHECK-NEXT: ret void
;
bb5:
br label %loop.header
loop.header: ; preds = %loop.latch, %bb8
%iv = phi i64 [ 99, %bb5 ], [ %iv.next, %loop.latch ]
%and = and i64 %iv, 1
%icmp17 = icmp eq i64 %and, 0
br i1 %icmp17, label %bb18, label %loop.latch, !prof !21
bb18: ; preds = %loop.header
%or = or disjoint i64 %iv, 1
%getelementptr19 = getelementptr inbounds i64, ptr %arr, i64 %or
store i64 1, ptr %getelementptr19, align 8
br label %loop.latch
loop.latch: ; preds = %bb18, %loop.header
%iv.next = add nsw i64 %iv, -1
%icmp22 = icmp eq i64 %iv.next, 90
br i1 %icmp22, label %bb6, label %loop.header, !prof !22
bb6:
ret void
}
attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" }
!4 = !{}
!10 = !{i32 1}
!16 = !{i64 864}
!17 = !{i64 8}
!21 = !{!"branch_weights", i32 1, i32 1}
!22 = !{!"branch_weights", i32 1, i32 95}
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23}
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]}
; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1}
; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
;.