As LoopPredication performs non-equivalent transforms removing some checks from loops, other passes may not be able to perform transforms they'd be able to do if the checks were left in loops. This patch makes LoopPredication insert assumes of the replaced conditions either after a guard call or in the true block of widenable condition branch. Differential Revision: https://reviews.llvm.org/D135354
109 lines
4.9 KiB
LLVM
109 lines
4.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -passes='loop-mssa(loop-predication),gvn,simplifycfg' -loop-predication-insert-assumes-of-predicated-guards-conditions=true < %s 2>&1 | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
define i32 @test0(i32* %p1, i8* %p2, i32* %p3, i8* %p4, i8* %p5, i1 %c, i32 %x) {
|
|
; CHECK-LABEL: @test0(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[P1_1:%.*]] = getelementptr i32, i32* [[P1:%.*]], i64 1
|
|
; CHECK-NEXT: [[P1_2:%.*]] = getelementptr i32, i32* [[P1]], i64 2
|
|
; CHECK-NEXT: [[P1_3:%.*]] = getelementptr i32, i32* [[P1]], i64 3
|
|
; CHECK-NEXT: [[IV_1_START:%.*]] = load i32, i32* [[P1_1]], align 4, !range [[RNG0:![0-9]+]]
|
|
; CHECK-NEXT: [[IV_1_END:%.*]] = load i32, i32* [[P1_2]], align 4, !range [[RNG0]]
|
|
; CHECK-NEXT: [[IV_2_END:%.*]] = load i32, i32* [[P1_3]], align 4, !range [[RNG0]]
|
|
; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_2_END]], [[IV_1_END]]
|
|
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
|
|
; CHECK: loop.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV_1_END]], -1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[IV_1_START]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i32 [[IV_2_END]], [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[IV_1_START]], [[IV_1_END]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP3]], [[TMP2]]
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[IV_1_NEXT:%.*]], [[LOOP_NEXT:%.*]] ], [ [[IV_1_START]], [[LOOP_PREHEADER]] ]
|
|
; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], [[LOOP_NEXT]] ], [ 0, [[LOOP_PREHEADER]] ]
|
|
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, i8* [[P2:%.*]], i32 [[IV_1]]
|
|
; CHECK-NEXT: [[VALUE:%.*]] = load i8, i8* [[GEP_1]], align 1
|
|
; CHECK-NEXT: [[COND_1:%.*]] = icmp ult i32 [[IV_1]], [[IV_1_END]]
|
|
; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[WC]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_NEXT]], label [[DEOPT:%.*]]
|
|
; CHECK: loop.next:
|
|
; CHECK-NEXT: call void @llvm.assume(i1 [[COND_1]])
|
|
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, i8* [[P4:%.*]], i32 [[IV_1]]
|
|
; CHECK-NEXT: store i8 [[VALUE]], i8* [[GEP_3]], align 1
|
|
; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i32 [[IV_1]], 1
|
|
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i32 [[IV_2]], 1
|
|
; CHECK-NEXT: [[LATCH_COND:%.*]] = icmp ult i32 [[IV_2]], [[IV_2_END]]
|
|
; CHECK-NEXT: br i1 [[LATCH_COND]], label [[LOOP]], label [[EXIT]]
|
|
; CHECK: deopt:
|
|
; CHECK-NEXT: [[DEOPT_RES:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
|
|
; CHECK-NEXT: ret i32 [[DEOPT_RES]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_2]], [[LOOP_NEXT]] ]
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
entry:
|
|
%p1.1 = getelementptr i32, i32* %p1, i64 1
|
|
%p1.2 = getelementptr i32, i32* %p1, i64 2
|
|
%p1.3 = getelementptr i32, i32* %p1, i64 3
|
|
%iv.1.start = load i32, i32* %p1.1, !range !0
|
|
%iv.1.end = load i32, i32* %p1.2, !range !0
|
|
%iv.2.end = load i32, i32* %p1.3, !range !0
|
|
%loop.cond = icmp ult i32 %iv.2.end, %iv.1.end
|
|
br i1 %loop.cond, label %loop, label %exit
|
|
|
|
loop:
|
|
%iv.1 = phi i32 [ %iv.1.start, %entry ], [ %iv.1.next, %latch ]
|
|
%iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %latch ]
|
|
%gep.1 = getelementptr i8, i8* %p2, i32 %iv.1
|
|
%value = load i8, i8* %gep.1
|
|
%cond.1 = icmp ult i32 %iv.1, %iv.1.end
|
|
%wc = call i1 @llvm.experimental.widenable.condition()
|
|
%explicit_guard_cond = and i1 %cond.1, %wc
|
|
br i1 %explicit_guard_cond, label %loop.next, label %deopt
|
|
|
|
loop.next:
|
|
br i1 %cond.1, label %if.true, label %if.false
|
|
|
|
if.true:
|
|
%gep.3 = getelementptr i8, i8* %p4, i32 %iv.1
|
|
store i8 %value, i8* %gep.3
|
|
br label %latch
|
|
|
|
if.false:
|
|
%gep.4 = getelementptr i8, i8* %p4, i32 %iv.2
|
|
store i8 %value, i8* %gep.4
|
|
br label %latch
|
|
|
|
latch:
|
|
%iv.1.next = add nuw nsw i32 %iv.1, 1
|
|
%iv.2.next = add nuw nsw i32 %iv.2, 1
|
|
%latch.cond = icmp ult i32 %iv.2, %iv.2.end
|
|
br i1 %latch.cond, label %loop, label %exit
|
|
|
|
deopt:
|
|
%deopt_res = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
|
|
ret i32 %deopt_res
|
|
|
|
exit:
|
|
%res = phi i32 [ 0, %entry ], [ %iv.2, %latch ]
|
|
ret i32 %res
|
|
}
|
|
|
|
; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn
|
|
declare void @llvm.assume(i1) #0
|
|
|
|
; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn
|
|
declare i1 @llvm.experimental.widenable.condition() #1
|
|
|
|
declare i32 @llvm.experimental.deoptimize.i32(...)
|
|
|
|
attributes #0 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn }
|
|
attributes #1 = { inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn }
|
|
|
|
!0 = !{i32 0, i32 2147483646}
|