Files
clang-p2996/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
Juneyoung Lee ed253ef772 [LoopVectorize] Fix VPRecipeBuilder::createEdgeMask to correctly generate the mask
This patch fixes pr48832 by correctly generating the mask when a poison value is involved.

Consider this CFG (which is a part of the input):

```
for.body:                                         ; preds = %for.cond
  br i1 true, label %cond.false, label %land.rhs

land.rhs:                                         ; preds = %for.body
  br i1 poison, label %cond.end, label %cond.false

cond.false:                                       ; preds = %for.body, %land.rhs
  br label %cond.end

cond.end:                                         ; preds = %land.rhs, %cond.false
  %cond = phi i32 [ 0, %cond.false ], [ 1, %land.rhs ]

```

The path for.body -> land.rhs -> cond.end should be taken when 'select i1 false, i1 poison, i1 false' holds (which means it's never taken); but VPRecipeBuilder::createEdgeMask was emitting 'and i1 false, poison' instead.
The former one successfully blocks poison propagation whereas the latter one doesn't, making the condition poison and thus causing the miscompilation.

SimplifyCFG has a similar bug (which didn't expose a real-world bug yet), and a patch for this is also ongoing (see https://reviews.llvm.org/D95026).

Reviewed By: bjope

Differential Revision: https://reviews.llvm.org/D95217
2021-02-14 21:12:34 +09:00

278 lines
12 KiB
LLVM

; RUN: opt -S -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -verify-loop-info -simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s
; RUN: opt -S -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info < %s | FileCheck %s --check-prefix=UNROLL-NO-VF
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Test predication of non-void instructions, specifically (i) that these
; instructions permit vectorization and (ii) the creation of an insertelement
; and a Phi node. We check the full 2-element sequence for the first
; instruction; For the rest we'll just make sure they get predicated based
; on the code generated for the first element.
define void @test(i32* nocapture %asd, i32* nocapture %aud,
i32* nocapture %asr, i32* nocapture %aur) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %if.end
ret void
; CHECK-LABEL: test
; CHECK: vector.body:
; CHECK: %[[SDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[SDEE]], label %[[CSD:[a-zA-Z0-9.]+]], label %[[ESD:[a-zA-Z0-9.]+]]
; CHECK: [[CSD]]:
; CHECK: %[[SDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SD0:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0]], %[[SDA1]]
; CHECK: %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SD0]], i32 0
; CHECK: br label %[[ESD]]
; CHECK: [[ESD]]:
; CHECK: %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[SD1]], %[[CSD]] ]
; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1
; CHECK: br i1 %[[SDEEH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]]
; CHECK: [[CSDH]]:
; CHECK: %[[SDA0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[SDA1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1
; CHECK: %[[SD0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0H]], %[[SDA1H]]
; CHECK: %[[SD1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> %[[SDR]], i32 %[[SD0H]], i32 1
; CHECK: br label %[[ESDH]]
; CHECK: [[ESDH]]:
; CHECK: %{{.*}} = phi <2 x i32> [ %[[SDR]], %[[ESD]] ], [ %[[SD1H]], %[[CSDH]] ]
; CHECK: %[[UDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[UDEE]], label %[[CUD:[a-zA-Z0-9.]+]], label %[[EUD:[a-zA-Z0-9.]+]]
; CHECK: [[CUD]]:
; CHECK: %[[UDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UD0:[a-zA-Z0-9]+]] = udiv i32 %[[UDA0]], %[[UDA1]]
; CHECK: %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UD0]], i32 0
; CHECK: br label %[[EUD]]
; CHECK: [[EUD]]:
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UD1]], %[[CUD]] ]
; CHECK: %[[SREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[SREE]], label %[[CSR:[a-zA-Z0-9.]+]], label %[[ESR:[a-zA-Z0-9.]+]]
; CHECK: [[CSR]]:
; CHECK: %[[SRA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SRA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[SR0:[a-zA-Z0-9]+]] = srem i32 %[[SRA0]], %[[SRA1]]
; CHECK: %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[SR0]], i32 0
; CHECK: br label %[[ESR]]
; CHECK: [[ESR]]:
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[SR1]], %[[CSR]] ]
; CHECK: %[[UREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
; CHECK: br i1 %[[UREE]], label %[[CUR:[a-zA-Z0-9.]+]], label %[[EUR:[a-zA-Z0-9.]+]]
; CHECK: [[CUR]]:
; CHECK: %[[URA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[URA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0
; CHECK: %[[UR0:[a-zA-Z0-9]+]] = urem i32 %[[URA0]], %[[URA1]]
; CHECK: %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> poison, i32 %[[UR0]], i32 0
; CHECK: br label %[[EUR]]
; CHECK: [[EUR]]:
; CHECK: %{{.*}} = phi <2 x i32> [ poison, %{{.*}} ], [ %[[UR1]], %[[CUR]] ]
for.body: ; preds = %if.end, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
%isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
%iud = getelementptr inbounds i32, i32* %aud, i64 %indvars.iv
%isr = getelementptr inbounds i32, i32* %asr, i64 %indvars.iv
%iur = getelementptr inbounds i32, i32* %aur, i64 %indvars.iv
%lsd = load i32, i32* %isd, align 4
%lud = load i32, i32* %iud, align 4
%lsr = load i32, i32* %isr, align 4
%lur = load i32, i32* %iur, align 4
%psd = add nsw i32 %lsd, 23
%pud = add nsw i32 %lud, 24
%psr = add nsw i32 %lsr, 25
%pur = add nsw i32 %lur, 26
%cmp1 = icmp slt i32 %lsd, 100
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
%rsd = sdiv i32 %psd, %lsd
%rud = udiv i32 %pud, %lud
%rsr = srem i32 %psr, %lsr
%rur = urem i32 %pur, %lur
br label %if.end
if.end: ; preds = %if.then, %for.body
%ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]
%yud.0 = phi i32 [ %rud, %if.then ], [ %pud, %for.body ]
%ysr.0 = phi i32 [ %rsr, %if.then ], [ %psr, %for.body ]
%yur.0 = phi i32 [ %rur, %if.then ], [ %pur, %for.body ]
store i32 %ysd.0, i32* %isd, align 4
store i32 %yud.0, i32* %iud, align 4
store i32 %ysr.0, i32* %isr, align 4
store i32 %yur.0, i32* %iur, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 128
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
define void @test_scalar2scalar(i32* nocapture %asd, i32* nocapture %bsd) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %if.end
ret void
; CHECK-LABEL: test_scalar2scalar
; CHECK: vector.body:
; CHECK: br i1 %{{.*}}, label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]
; CHECK: [[THEN]]:
; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
; CHECK: br label %[[FI]]
; CHECK: [[FI]]:
; CHECK: %{{.*}} = phi i32 [ poison, %vector.body ], [ %[[PD]], %[[THEN]] ]
for.body: ; preds = %if.end, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
%isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
%lsd = load i32, i32* %isd, align 4
%isd.b = getelementptr inbounds i32, i32* %bsd, i64 %indvars.iv
%lsd.b = load i32, i32* %isd.b, align 4
%psd = add nsw i32 %lsd, 23
%cmp1 = icmp slt i32 %lsd, 100
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
%sd1 = sdiv i32 %psd, %lsd
%rsd = sdiv i32 %lsd.b, %sd1
br label %if.end
if.end: ; preds = %if.then, %for.body
%ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]
store i32 %ysd.0, i32* %isd, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 128
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
define void @pr30172(i32* nocapture %asd, i32* nocapture %bsd) {
entry:
br label %for.body
for.cond.cleanup: ; preds = %if.end
ret void
; CHECK-LABEL: pr30172
; CHECK: vector.body:
; CHECK: %[[CMP1:.+]] = icmp slt <2 x i32> %[[VAL:.+]], <i32 100, i32 100>
; CHECK: %[[CMP2:.+]] = icmp sge <2 x i32> %[[VAL]], <i32 200, i32 200>
; CHECK: %[[NOT:.+]] = xor <2 x i1> %[[CMP1]], <i1 true, i1 true>
; CHECK: %[[AND:.+]] = select <2 x i1> %[[NOT]], <2 x i1> %[[CMP2]], <2 x i1> zeroinitializer
; CHECK: %[[OR:.+]] = or <2 x i1> %[[AND]], %[[CMP1]]
; CHECK: %[[EXTRACT:.+]] = extractelement <2 x i1> %[[OR]], i32 0
; CHECK: br i1 %[[EXTRACT]], label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]
; CHECK: [[THEN]]:
; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
; CHECK: br label %[[FI]]
; CHECK: [[FI]]:
; CHECK: %{{.*}} = phi i32 [ poison, %vector.body ], [ %[[PD]], %[[THEN]] ]
for.body: ; preds = %if.end, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
%isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
%lsd = load i32, i32* %isd, align 4
%isd.b = getelementptr inbounds i32, i32* %bsd, i64 %indvars.iv
%lsd.b = load i32, i32* %isd.b, align 4
%psd = add nsw i32 %lsd, 23
%cmp1 = icmp slt i32 %lsd, 100
br i1 %cmp1, label %if.then, label %check
check: ; preds = %for.body
%cmp2 = icmp sge i32 %lsd, 200
br i1 %cmp2, label %if.then, label %if.end
if.then: ; preds = %check, %for.body
%sd1 = sdiv i32 %psd, %lsd
%rsd = sdiv i32 %lsd.b, %sd1
br label %if.end
if.end: ; preds = %if.then, %check
%ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %check ]
store i32 %ysd.0, i32* %isd, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 128
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
define i32 @predicated_udiv_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) {
entry:
br label %for.body
; CHECK-LABEL: predicated_udiv_scalarized_operand
; CHECK: vector.body:
; CHECK: %wide.load = load <2 x i32>, <2 x i32>* {{.*}}, align 4
; CHECK: br i1 {{.*}}, label %[[IF0:.+]], label %[[CONT0:.+]]
; CHECK: [[IF0]]:
; CHECK: %[[T00:.+]] = extractelement <2 x i32> %wide.load, i32 0
; CHECK: %[[T01:.+]] = add nsw i32 %[[T00]], %x
; CHECK: %[[T02:.+]] = extractelement <2 x i32> %wide.load, i32 0
; CHECK: %[[T03:.+]] = udiv i32 %[[T02]], %[[T01]]
; CHECK: %[[T04:.+]] = insertelement <2 x i32> poison, i32 %[[T03]], i32 0
; CHECK: br label %[[CONT0]]
; CHECK: [[CONT0]]:
; CHECK: %[[T05:.+]] = phi <2 x i32> [ poison, %vector.body ], [ %[[T04]], %[[IF0]] ]
; CHECK: br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
; CHECK: [[IF1]]:
; CHECK: %[[T06:.+]] = extractelement <2 x i32> %wide.load, i32 1
; CHECK: %[[T07:.+]] = add nsw i32 %[[T06]], %x
; CHECK: %[[T08:.+]] = extractelement <2 x i32> %wide.load, i32 1
; CHECK: %[[T09:.+]] = udiv i32 %[[T08]], %[[T07]]
; CHECK: %[[T10:.+]] = insertelement <2 x i32> %[[T05]], i32 %[[T09]], i32 1
; CHECK: br label %[[CONT1]]
; CHECK: [[CONT1]]:
; CHECK: phi <2 x i32> [ %[[T05]], %[[CONT0]] ], [ %[[T10]], %[[IF1]] ]
; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
; Test predicating an instruction that feeds a vectorizable use, when unrolled
; but not vectorized. Derived from pr34248 reproducer.
;
; UNROLL-NO-VF-LABEL: predicated_udiv_scalarized_operand
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF: %[[LOAD0:.+]] = load i32, i32*
; UNROLL-NO-VF: %[[LOAD1:.+]] = load i32, i32*
; UNROLL-NO-VF: br i1 {{.*}}, label %[[IF0:.+]], label %[[CONT0:.+]]
; UNROLL-NO-VF: [[IF0]]:
; UNROLL-NO-VF: %[[ADD0:.+]] = add nsw i32 %[[LOAD0]], %x
; UNROLL-NO-VF: %[[DIV0:.+]] = udiv i32 %[[LOAD0]], %[[ADD0]]
; UNROLL-NO-VF: br label %[[CONT0]]
; UNROLL-NO-VF: [[CONT0]]:
; UNROLL-NO-VF: phi i32 [ poison, %vector.body ], [ %[[DIV0]], %[[IF0]] ]
; UNROLL-NO-VF: br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
; UNROLL-NO-VF: [[IF1]]:
; UNROLL-NO-VF: %[[ADD1:.+]] = add nsw i32 %[[LOAD1]], %x
; UNROLL-NO-VF: %[[DIV1:.+]] = udiv i32 %[[LOAD1]], %[[ADD1]]
; UNROLL-NO-VF: br label %[[CONT1]]
; UNROLL-NO-VF: [[CONT1]]:
; UNROLL-NO-VF: phi i32 [ poison, %[[CONT0]] ], [ %[[DIV1]], %[[IF1]] ]
; UNROLL-NO-VF: br i1 {{.*}}, label %middle.block, label %vector.body
;
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
%r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ]
%tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
%tmp2 = load i32, i32* %tmp0, align 4
br i1 %c, label %if.then, label %for.inc
if.then:
%tmp3 = add nsw i32 %tmp2, %x
%tmp4 = udiv i32 %tmp2, %tmp3
br label %for.inc
for.inc:
%tmp5 = phi i32 [ %tmp2, %for.body ], [ %tmp4, %if.then]
%tmp6 = add i32 %r, %tmp5
%i.next = add nuw nsw i64 %i, 1
%cond = icmp slt i64 %i.next, %n
br i1 %cond, label %for.body, label %for.end
for.end:
%tmp7 = phi i32 [ %tmp6, %for.inc ]
ret i32 %tmp7
}