Currently, SROA is CFG-preserving. Not doing so does not affect any pipeline test. (???) Internally, SROA requires Dominator Tree, and uses it solely for the final `-mem2reg` call. By design, we can't really SROA alloca if their address escapes somehow, but we have logic to deal with `load` of `select`/`PHI`, where at least one of the possible addresses prevents promotion, by speculating the `load`s and `select`ing between loaded values. As one would expect, that requires ensuring that the speculation is actually legal. Even ignoring complexity bailouts, that logic does not deal with everything, e.g. `isSafeToLoadUnconditionally()` does not recurse into hands of `select`. There can also be cases where the load is genuinely non-speculate. So if we can't prove that the load can be speculated, unfold the select, produce two-entry phi node, and perform predicated load. Now, that transformation must obviously update Dominator Tree, since we require it later on. Doing so is trivial. Additionally, we don't want to do this for the final SROA invocation (D136806). In the end, this ends up having negative (!) compile-time cost: https://llvm-compile-time-tracker.com/compare.php?from=c6d7e80ec4c17a415673b1cfd25924f98ac83608&to=ddf9600365093ea50d7e278696cbfa01641c959d&stat=instructions:u Though indeed, this only deals with `select`s, `PHI`s are still using speculation. Should we update some more analysis? Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D138238 This reverts commit739611870d, and recommits03e6d9d9d1with a fixed assertion - we should check that DTU is there, not just assert false...
143 lines
5.2 KiB
LLVM
143 lines
5.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes='bdce,sroa<preserve-cfg>,bdce' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
|
|
; RUN: opt < %s -passes='bdce,sroa<modify-cfg>,bdce' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
|
|
|
|
; SROA fails to rewrite allocs but does rewrite some phis and delete
|
|
; dead instructions. Ensure that this invalidates analyses required
|
|
; for other passes.
|
|
|
|
target datalayout = "e-m:e-i64:64-n32:64"
|
|
target triple = "powerpc64le-grtev4-linux-gnu"
|
|
|
|
%class.b = type { i64 }
|
|
|
|
declare void @D(ptr sret(%class.b), ptr dereferenceable(32)) local_unnamed_addr
|
|
|
|
; Function Attrs: nounwind
|
|
define void @H(ptr noalias nocapture readnone, [2 x i64], ptr %ptr, i32 signext %v, i64 %l, i64 %idx, ptr nonnull dereferenceable(32) %ptr2) {
|
|
; CHECK-LABEL: @H(
|
|
; CHECK-NEXT: [[TMP3:%.*]] = alloca [[CLASS_B:%.*]], align 8
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [2 x i64] [[TMP1:%.*]], 1
|
|
; CHECK-NEXT: switch i64 [[TMP4]], label [[TMP6:%.*]] [
|
|
; CHECK-NEXT: i64 4, label [[FOO:%.*]]
|
|
; CHECK-NEXT: i64 5, label [[TMP5:%.*]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: 5:
|
|
; CHECK-NEXT: br label [[TMP12:%.*]]
|
|
; CHECK: 6:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP4]], 5
|
|
; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP12]]
|
|
; CHECK: 8:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr inttoptr (i64 4 to ptr), align 4
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i8 [[TMP9]], 47
|
|
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 5, i64 4
|
|
; CHECK-NEXT: br label [[TMP12]]
|
|
; CHECK: 12:
|
|
; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ 4, [[TMP5]] ], [ [[TMP11]], [[TMP8]] ], [ 4, [[TMP6]] ]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP4]], 0
|
|
; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP4]], [[TMP13]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label [[TMP17:%.*]], label [[A_EXIT:%.*]]
|
|
; CHECK: 17:
|
|
; CHECK-NEXT: [[TMP18:%.*]] = tail call ptr @memchr(ptr [[PTR:%.*]], i32 signext [[V:%.*]], i64 [[L:%.*]])
|
|
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq ptr [[TMP18]], null
|
|
; CHECK-NEXT: [[TMP20:%.*]] = sext i1 [[TMP19]] to i64
|
|
; CHECK-NEXT: br label [[A_EXIT]]
|
|
; CHECK: a.exit:
|
|
; CHECK-NEXT: [[TMP21:%.*]] = phi i64 [ -1, [[TMP12]] ], [ [[TMP20]], [[TMP17]] ]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 0 to ptr
|
|
; CHECK-NEXT: [[TMP23:%.*]] = sub nsw i64 [[TMP21]], [[TMP13]]
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[TMP3]])
|
|
; CHECK-NEXT: [[TMP24:%.*]] = icmp ult i64 [[TMP23]], 2
|
|
; CHECK-NEXT: br i1 [[TMP24]], label [[G_EXIT:%.*]], label [[TMP25:%.*]]
|
|
; CHECK: 25:
|
|
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i64 [[IDX:%.*]]
|
|
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq ptr [[TMP26]], null
|
|
; CHECK-NEXT: br i1 [[TMP27]], label [[TMP28:%.*]], label [[TMP29:%.*]]
|
|
; CHECK: 28:
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: 29:
|
|
; CHECK-NEXT: call void @D(ptr nonnull sret([[CLASS_B]]) [[TMP3]], ptr nonnull dereferenceable(32) [[PTR2:%.*]])
|
|
; CHECK-NEXT: br label [[G_EXIT]]
|
|
; CHECK: G.exit:
|
|
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[TMP3]])
|
|
; CHECK-NEXT: br label [[FOO]]
|
|
; CHECK: foo:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%3 = alloca %class.b, align 8
|
|
%.sroa.0 = alloca i64, align 8
|
|
store i64 0, ptr %.sroa.0, align 8
|
|
%4 = extractvalue [2 x i64] %1, 1
|
|
switch i64 %4, label %6 [
|
|
i64 4, label %foo
|
|
i64 5, label %5
|
|
]
|
|
|
|
; <label>:5:
|
|
br label %12
|
|
|
|
; <label>:6:
|
|
%7 = icmp ugt i64 %4, 5
|
|
br i1 %7, label %8, label %12
|
|
|
|
; <label>:8:
|
|
%9 = load i8, ptr inttoptr (i64 4 to ptr), align 4
|
|
%10 = icmp eq i8 %9, 47
|
|
%11 = select i1 %10, i64 5, i64 4
|
|
br label %12
|
|
|
|
; <label>:12:
|
|
%13 = phi ptr [ %.sroa.0, %5 ], [ %.sroa.0, %8 ], [ %.sroa.0, %6 ]
|
|
%14 = phi i64 [ 4, %5 ], [ %11, %8 ], [ 4, %6 ]
|
|
%15 = icmp ne i64 %4, 0
|
|
%16 = icmp ugt i64 %4, %14
|
|
%17 = and i1 %15, %16
|
|
br i1 %17, label %18, label %a.exit
|
|
|
|
; <label>:18:
|
|
%19 = tail call ptr @memchr(ptr %ptr, i32 signext %v, i64 %l)
|
|
%20 = icmp eq ptr %19, null
|
|
%21 = sext i1 %20 to i64
|
|
br label %a.exit
|
|
|
|
a.exit:
|
|
%22 = phi i64 [ -1, %12 ], [ %21, %18 ]
|
|
%23 = load ptr, ptr %13, align 8
|
|
%24 = sub nsw i64 %22, %14
|
|
call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3)
|
|
%25 = icmp ult i64 %24, 2
|
|
br i1 %25, label %G.exit, label %26
|
|
|
|
; <label>:27:
|
|
%27 = getelementptr inbounds i8, ptr %23, i64 %idx
|
|
%28 = icmp eq ptr %27, null
|
|
br i1 %28, label %29, label %30
|
|
|
|
; <label>:30:
|
|
unreachable
|
|
|
|
; <label>:31:
|
|
call void @D(ptr nonnull sret(%class.b) %3, ptr nonnull dereferenceable(32) %ptr2)
|
|
br label %G.exit
|
|
|
|
G.exit:
|
|
call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3)
|
|
br label %foo
|
|
|
|
foo:
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind readonly
|
|
declare ptr @memchr(ptr, i32 signext, i64) local_unnamed_addr
|
|
|
|
; Function Attrs: argmemonly nounwind
|
|
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
|
|
|
|
; Function Attrs: argmemonly nounwind
|
|
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; CHECK-MODIFY-CFG: {{.*}}
|
|
; CHECK-PRESERVE-CFG: {{.*}}
|