This patch canonicalizes getelementptr instructions with constant indices to use the `i8` source element type. This makes it easier for optimizations to recognize that two GEPs are identical, because they don't need to see past many different ways to express the same offset. This is a first step towards https://discourse.llvm.org/t/rfc-replacing-getelementptr-with-ptradd/68699. This is limited to constant GEPs only for now, as they have a clear canonical form, while we're not yet sure how exactly to deal with variable indices. The test llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll gives two representative examples of the kind of optimization improvement we expect from this change. In the first test SimplifyCFG can now realize that all switch branches are actually the same. In the second test it can convert it into simple arithmetic. These are representative of common optimization failures we see in Rust. Fixes https://github.com/llvm/llvm-project/issues/69841.
290 lines
8.5 KiB
LLVM
290 lines
8.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=instcombine -S %s | FileCheck %s
|
|
target datalayout = "p:32:32"
|
|
|
|
|
|
define i1 @alloca_argument_compare(ptr %arg) {
|
|
; CHECK-LABEL: @alloca_argument_compare(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%alloc = alloca i64
|
|
%cmp = icmp eq ptr %arg, %alloc
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @alloca_argument_compare_swapped(ptr %arg) {
|
|
; CHECK-LABEL: @alloca_argument_compare_swapped(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%alloc = alloca i64
|
|
%cmp = icmp eq ptr %alloc, %arg
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @alloca_argument_compare_ne(ptr %arg) {
|
|
; CHECK-LABEL: @alloca_argument_compare_ne(
|
|
; CHECK-NEXT: ret i1 true
|
|
;
|
|
%alloc = alloca i64
|
|
%cmp = icmp ne ptr %arg, %alloc
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @alloca_argument_compare_derived_ptrs(ptr %arg, i64 %x) {
|
|
; CHECK-LABEL: @alloca_argument_compare_derived_ptrs(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%alloc = alloca i64, i64 8
|
|
%p = getelementptr i64, ptr %arg, i64 %x
|
|
%q = getelementptr i64, ptr %alloc, i64 3
|
|
%cmp = icmp eq ptr %p, %q
|
|
ret i1 %cmp
|
|
}
|
|
|
|
declare void @escape(ptr)
|
|
define i1 @alloca_argument_compare_escaped_alloca(ptr %arg) {
|
|
; CHECK-LABEL: @alloca_argument_compare_escaped_alloca(
|
|
; CHECK-NEXT: [[ALLOC:%.*]] = alloca i64, align 8
|
|
; CHECK-NEXT: call void @escape(ptr nonnull [[ALLOC]])
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[ALLOC]], [[ARG:%.*]]
|
|
; CHECK-NEXT: ret i1 [[CMP]]
|
|
;
|
|
%alloc = alloca i64
|
|
call void @escape(ptr %alloc)
|
|
%cmp = icmp eq ptr %alloc, %arg
|
|
ret i1 %cmp
|
|
}
|
|
|
|
declare void @check_compares(i1, i1)
|
|
define void @alloca_argument_compare_two_compares(ptr %p) {
|
|
; CHECK-LABEL: @alloca_argument_compare_two_compares(
|
|
; CHECK-NEXT: call void @check_compares(i1 false, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%q = alloca i64, i64 8
|
|
%r = getelementptr i64, ptr %p, i64 1
|
|
%s = getelementptr i64, ptr %q, i64 2
|
|
%cmp1 = icmp eq ptr %p, %q
|
|
%cmp2 = icmp eq ptr %r, %s
|
|
call void @check_compares(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
; We will only fold if there is a single cmp.
|
|
}
|
|
|
|
define i1 @alloca_argument_compare_escaped_through_store(ptr %arg, ptr %ptr) {
|
|
; CHECK-LABEL: @alloca_argument_compare_escaped_through_store(
|
|
; CHECK-NEXT: [[ALLOC:%.*]] = alloca i64, align 8
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[ALLOC]], [[ARG:%.*]]
|
|
; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i32 8
|
|
; CHECK-NEXT: store ptr [[P]], ptr [[PTR:%.*]], align 4
|
|
; CHECK-NEXT: ret i1 [[CMP]]
|
|
;
|
|
%alloc = alloca i64
|
|
%cmp = icmp eq ptr %alloc, %arg
|
|
%p = getelementptr i64, ptr %alloc, i64 1
|
|
store ptr %p, ptr %ptr
|
|
ret i1 %cmp
|
|
}
|
|
|
|
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
|
|
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
|
|
define i1 @alloca_argument_compare_benign_instrs(ptr %arg) {
|
|
; CHECK-LABEL: @alloca_argument_compare_benign_instrs(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%alloc = alloca i8
|
|
call void @llvm.lifetime.start.p0(i64 1, ptr %alloc)
|
|
%cmp = icmp eq ptr %arg, %alloc
|
|
%x = load i8, ptr %arg
|
|
store i8 %x, ptr %alloc
|
|
call void @llvm.lifetime.end.p0(i64 1, ptr %alloc)
|
|
ret i1 %cmp
|
|
}
|
|
|
|
declare ptr @allocator()
|
|
define i1 @alloca_call_compare() {
|
|
; CHECK-LABEL: @alloca_call_compare(
|
|
; CHECK-NEXT: [[Q:%.*]] = call ptr @allocator()
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%p = alloca i64
|
|
%q = call ptr @allocator()
|
|
%cmp = icmp eq ptr %p, %q
|
|
ret i1 %cmp
|
|
}
|
|
|
|
|
|
; The next block of tests demonstrate a very subtle correctness requirement.
|
|
; We can generally assume any *single* stack layout we chose for the result of
|
|
; an alloca, but we can't simultanious assume two different ones. As a
|
|
; result, we must make sure that we only fold conditions if we can ensure that
|
|
; we fold *all* potentially address capturing compares the same.
|
|
|
|
; These two functions represents either a) forging a pointer via inttoptr or
|
|
; b) indexing off an adjacent allocation. In either case, the operation is
|
|
; obscured by an uninlined helper and not visible to instcombine.
|
|
declare ptr @hidden_inttoptr()
|
|
declare ptr @hidden_offset(ptr %other)
|
|
|
|
define i1 @ptrtoint_single_cmp() {
|
|
; CHECK-LABEL: @ptrtoint_single_cmp(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = alloca i8, i32 4
|
|
%rhs = inttoptr i64 2048 to ptr
|
|
%cmp = icmp eq ptr %m, %rhs
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @offset_single_cmp() {
|
|
; CHECK-LABEL: @offset_single_cmp(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = alloca i8, i32 4
|
|
%n = alloca i8, i32 4
|
|
%rhs = getelementptr i8, ptr %n, i32 4
|
|
%cmp = icmp eq ptr %m, %rhs
|
|
ret i1 %cmp
|
|
}
|
|
|
|
declare void @witness(i1, i1)
|
|
|
|
define void @consistent_fold1() {
|
|
; CHECK-LABEL: @consistent_fold1(
|
|
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr()
|
|
; CHECK-NEXT: call void @witness(i1 false, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = alloca i8, i32 4
|
|
%rhs = inttoptr i64 2048 to ptr
|
|
%rhs2 = call ptr @hidden_inttoptr()
|
|
%cmp1 = icmp eq ptr %m, %rhs
|
|
%cmp2 = icmp eq ptr %m, %rhs2
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
define void @consistent_fold2() {
|
|
; CHECK-LABEL: @consistent_fold2(
|
|
; CHECK-NEXT: [[N2:%.*]] = alloca [4 x i8], align 1
|
|
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_offset(ptr nonnull [[N2]])
|
|
; CHECK-NEXT: call void @witness(i1 false, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = alloca i8, i32 4
|
|
%n = alloca i8, i32 4
|
|
%rhs = getelementptr i8, ptr %n, i32 4
|
|
%rhs2 = call ptr @hidden_offset(ptr %n)
|
|
%cmp1 = icmp eq ptr %m, %rhs
|
|
%cmp2 = icmp eq ptr %m, %rhs2
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
define void @consistent_fold3() {
|
|
; CHECK-LABEL: @consistent_fold3(
|
|
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr()
|
|
; CHECK-NEXT: call void @witness(i1 false, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = alloca i8, i32 4
|
|
%lgp = load ptr, ptr @gp, align 8
|
|
%rhs2 = call ptr @hidden_inttoptr()
|
|
%cmp1 = icmp eq ptr %m, %lgp
|
|
%cmp2 = icmp eq ptr %m, %rhs2
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
define void @neg_consistent_fold4() {
|
|
; CHECK-LABEL: @neg_consistent_fold4(
|
|
; CHECK-NEXT: call void @witness(i1 false, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = alloca i8, i32 4
|
|
%lgp = load ptr, ptr @gp, align 8
|
|
%cmp1 = icmp eq ptr %m, %lgp
|
|
%cmp2 = icmp eq ptr %m, %lgp
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
; A nocapture call can't cause a consistent result issue as it is (by
|
|
; assumption) not able to contain a comparison which might capture the
|
|
; address.
|
|
|
|
declare void @unknown(ptr)
|
|
|
|
define i1 @consistent_nocapture_inttoptr() {
|
|
; CHECK-LABEL: @consistent_nocapture_inttoptr(
|
|
; CHECK-NEXT: [[M1:%.*]] = alloca [4 x i8], align 1
|
|
; CHECK-NEXT: call void @unknown(ptr nocapture nonnull [[M1]])
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = alloca i8, i32 4
|
|
call void @unknown(ptr nocapture %m)
|
|
%rhs = inttoptr i64 2048 to ptr
|
|
%cmp = icmp eq ptr %m, %rhs
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @consistent_nocapture_offset() {
|
|
; CHECK-LABEL: @consistent_nocapture_offset(
|
|
; CHECK-NEXT: [[M1:%.*]] = alloca [4 x i8], align 1
|
|
; CHECK-NEXT: call void @unknown(ptr nocapture nonnull [[M1]])
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = alloca i8, i32 4
|
|
call void @unknown(ptr nocapture %m)
|
|
%n = alloca i8, i32 4
|
|
%rhs = getelementptr i8, ptr %n, i32 4
|
|
%cmp = icmp eq ptr %m, %rhs
|
|
ret i1 %cmp
|
|
}
|
|
|
|
@gp = global ptr null, align 8
|
|
|
|
define i1 @consistent_nocapture_through_global() {
|
|
; CHECK-LABEL: @consistent_nocapture_through_global(
|
|
; CHECK-NEXT: [[M1:%.*]] = alloca [4 x i8], align 1
|
|
; CHECK-NEXT: call void @unknown(ptr nocapture nonnull [[M1]])
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = alloca i8, i32 4
|
|
call void @unknown(ptr nocapture %m)
|
|
%lgp = load ptr, ptr @gp, align 8, !nonnull !{}
|
|
%cmp = icmp eq ptr %m, %lgp
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define void @select_alloca_unrelated_ptr(i1 %c, ptr %p, ptr %p2) {
|
|
; CHECK-LABEL: @select_alloca_unrelated_ptr(
|
|
; CHECK-NEXT: [[M:%.*]] = alloca i8, align 1
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[P:%.*]]
|
|
; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], ptr [[M]], ptr [[P2:%.*]]
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[S]], [[P]]
|
|
; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = alloca i8
|
|
%cmp1 = icmp eq ptr %m, %p
|
|
%s = select i1 %c, ptr %m, ptr %p2
|
|
%cmp2 = icmp eq ptr %s, %p
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
define void @alloca_offset_icmp(ptr %p, i32 %offset) {
|
|
; CHECK-LABEL: @alloca_offset_icmp(
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[OFFSET:%.*]], 0
|
|
; CHECK-NEXT: call void @witness(i1 false, i1 [[CMP2]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = alloca [4 x i8]
|
|
%g = getelementptr i8, ptr %m, i32 %offset
|
|
%cmp1 = icmp eq ptr %m, %p
|
|
%cmp2 = icmp eq ptr %m, %g
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|