Files
clang-p2996/llvm/test/Transforms/InstCombine/compare-alloca.ll
Nikita Popov 90ba33099c [InstCombine] Canonicalize constant GEPs to i8 source element type (#68882)
This patch canonicalizes getelementptr instructions with constant
indices to use the `i8` source element type. This makes it easier for
optimizations to recognize that two GEPs are identical, because they
don't need to see past many different ways to express the same offset.

This is a first step towards
https://discourse.llvm.org/t/rfc-replacing-getelementptr-with-ptradd/68699.
This is limited to constant GEPs only for now, as they have a clear
canonical form, while we're not yet sure how exactly to deal with
variable indices.

The test llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll gives
two representative examples of the kind of optimization improvement we
expect from this change. In the first test SimplifyCFG can now realize
that all switch branches are actually the same. In the second test it
can convert it into simple arithmetic. These are representative of
common optimization failures we see in Rust.

Fixes https://github.com/llvm/llvm-project/issues/69841.
2024-01-24 15:25:29 +01:00

290 lines
8.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=instcombine -S %s | FileCheck %s
target datalayout = "p:32:32"
define i1 @alloca_argument_compare(ptr %arg) {
; CHECK-LABEL: @alloca_argument_compare(
; CHECK-NEXT: ret i1 false
;
%alloc = alloca i64
%cmp = icmp eq ptr %arg, %alloc
ret i1 %cmp
}
define i1 @alloca_argument_compare_swapped(ptr %arg) {
; CHECK-LABEL: @alloca_argument_compare_swapped(
; CHECK-NEXT: ret i1 false
;
%alloc = alloca i64
%cmp = icmp eq ptr %alloc, %arg
ret i1 %cmp
}
define i1 @alloca_argument_compare_ne(ptr %arg) {
; CHECK-LABEL: @alloca_argument_compare_ne(
; CHECK-NEXT: ret i1 true
;
%alloc = alloca i64
%cmp = icmp ne ptr %arg, %alloc
ret i1 %cmp
}
define i1 @alloca_argument_compare_derived_ptrs(ptr %arg, i64 %x) {
; CHECK-LABEL: @alloca_argument_compare_derived_ptrs(
; CHECK-NEXT: ret i1 false
;
%alloc = alloca i64, i64 8
%p = getelementptr i64, ptr %arg, i64 %x
%q = getelementptr i64, ptr %alloc, i64 3
%cmp = icmp eq ptr %p, %q
ret i1 %cmp
}
declare void @escape(ptr)
define i1 @alloca_argument_compare_escaped_alloca(ptr %arg) {
; CHECK-LABEL: @alloca_argument_compare_escaped_alloca(
; CHECK-NEXT: [[ALLOC:%.*]] = alloca i64, align 8
; CHECK-NEXT: call void @escape(ptr nonnull [[ALLOC]])
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[ALLOC]], [[ARG:%.*]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%alloc = alloca i64
call void @escape(ptr %alloc)
%cmp = icmp eq ptr %alloc, %arg
ret i1 %cmp
}
declare void @check_compares(i1, i1)
define void @alloca_argument_compare_two_compares(ptr %p) {
; CHECK-LABEL: @alloca_argument_compare_two_compares(
; CHECK-NEXT: call void @check_compares(i1 false, i1 false)
; CHECK-NEXT: ret void
;
%q = alloca i64, i64 8
%r = getelementptr i64, ptr %p, i64 1
%s = getelementptr i64, ptr %q, i64 2
%cmp1 = icmp eq ptr %p, %q
%cmp2 = icmp eq ptr %r, %s
call void @check_compares(i1 %cmp1, i1 %cmp2)
ret void
; We will only fold if there is a single cmp.
}
define i1 @alloca_argument_compare_escaped_through_store(ptr %arg, ptr %ptr) {
; CHECK-LABEL: @alloca_argument_compare_escaped_through_store(
; CHECK-NEXT: [[ALLOC:%.*]] = alloca i64, align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[ALLOC]], [[ARG:%.*]]
; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i32 8
; CHECK-NEXT: store ptr [[P]], ptr [[PTR:%.*]], align 4
; CHECK-NEXT: ret i1 [[CMP]]
;
%alloc = alloca i64
%cmp = icmp eq ptr %alloc, %arg
%p = getelementptr i64, ptr %alloc, i64 1
store ptr %p, ptr %ptr
ret i1 %cmp
}
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
define i1 @alloca_argument_compare_benign_instrs(ptr %arg) {
; CHECK-LABEL: @alloca_argument_compare_benign_instrs(
; CHECK-NEXT: ret i1 false
;
%alloc = alloca i8
call void @llvm.lifetime.start.p0(i64 1, ptr %alloc)
%cmp = icmp eq ptr %arg, %alloc
%x = load i8, ptr %arg
store i8 %x, ptr %alloc
call void @llvm.lifetime.end.p0(i64 1, ptr %alloc)
ret i1 %cmp
}
declare ptr @allocator()
define i1 @alloca_call_compare() {
; CHECK-LABEL: @alloca_call_compare(
; CHECK-NEXT: [[Q:%.*]] = call ptr @allocator()
; CHECK-NEXT: ret i1 false
;
%p = alloca i64
%q = call ptr @allocator()
%cmp = icmp eq ptr %p, %q
ret i1 %cmp
}
; The next block of tests demonstrate a very subtle correctness requirement.
; We can generally assume any *single* stack layout we chose for the result of
; an alloca, but we can't simultanious assume two different ones. As a
; result, we must make sure that we only fold conditions if we can ensure that
; we fold *all* potentially address capturing compares the same.
; These two functions represents either a) forging a pointer via inttoptr or
; b) indexing off an adjacent allocation. In either case, the operation is
; obscured by an uninlined helper and not visible to instcombine.
declare ptr @hidden_inttoptr()
declare ptr @hidden_offset(ptr %other)
define i1 @ptrtoint_single_cmp() {
; CHECK-LABEL: @ptrtoint_single_cmp(
; CHECK-NEXT: ret i1 false
;
%m = alloca i8, i32 4
%rhs = inttoptr i64 2048 to ptr
%cmp = icmp eq ptr %m, %rhs
ret i1 %cmp
}
define i1 @offset_single_cmp() {
; CHECK-LABEL: @offset_single_cmp(
; CHECK-NEXT: ret i1 false
;
%m = alloca i8, i32 4
%n = alloca i8, i32 4
%rhs = getelementptr i8, ptr %n, i32 4
%cmp = icmp eq ptr %m, %rhs
ret i1 %cmp
}
declare void @witness(i1, i1)
define void @consistent_fold1() {
; CHECK-LABEL: @consistent_fold1(
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr()
; CHECK-NEXT: call void @witness(i1 false, i1 false)
; CHECK-NEXT: ret void
;
%m = alloca i8, i32 4
%rhs = inttoptr i64 2048 to ptr
%rhs2 = call ptr @hidden_inttoptr()
%cmp1 = icmp eq ptr %m, %rhs
%cmp2 = icmp eq ptr %m, %rhs2
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
define void @consistent_fold2() {
; CHECK-LABEL: @consistent_fold2(
; CHECK-NEXT: [[N2:%.*]] = alloca [4 x i8], align 1
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_offset(ptr nonnull [[N2]])
; CHECK-NEXT: call void @witness(i1 false, i1 false)
; CHECK-NEXT: ret void
;
%m = alloca i8, i32 4
%n = alloca i8, i32 4
%rhs = getelementptr i8, ptr %n, i32 4
%rhs2 = call ptr @hidden_offset(ptr %n)
%cmp1 = icmp eq ptr %m, %rhs
%cmp2 = icmp eq ptr %m, %rhs2
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
define void @consistent_fold3() {
; CHECK-LABEL: @consistent_fold3(
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr()
; CHECK-NEXT: call void @witness(i1 false, i1 false)
; CHECK-NEXT: ret void
;
%m = alloca i8, i32 4
%lgp = load ptr, ptr @gp, align 8
%rhs2 = call ptr @hidden_inttoptr()
%cmp1 = icmp eq ptr %m, %lgp
%cmp2 = icmp eq ptr %m, %rhs2
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
define void @neg_consistent_fold4() {
; CHECK-LABEL: @neg_consistent_fold4(
; CHECK-NEXT: call void @witness(i1 false, i1 false)
; CHECK-NEXT: ret void
;
%m = alloca i8, i32 4
%lgp = load ptr, ptr @gp, align 8
%cmp1 = icmp eq ptr %m, %lgp
%cmp2 = icmp eq ptr %m, %lgp
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
; A nocapture call can't cause a consistent result issue as it is (by
; assumption) not able to contain a comparison which might capture the
; address.
declare void @unknown(ptr)
define i1 @consistent_nocapture_inttoptr() {
; CHECK-LABEL: @consistent_nocapture_inttoptr(
; CHECK-NEXT: [[M1:%.*]] = alloca [4 x i8], align 1
; CHECK-NEXT: call void @unknown(ptr nocapture nonnull [[M1]])
; CHECK-NEXT: ret i1 false
;
%m = alloca i8, i32 4
call void @unknown(ptr nocapture %m)
%rhs = inttoptr i64 2048 to ptr
%cmp = icmp eq ptr %m, %rhs
ret i1 %cmp
}
define i1 @consistent_nocapture_offset() {
; CHECK-LABEL: @consistent_nocapture_offset(
; CHECK-NEXT: [[M1:%.*]] = alloca [4 x i8], align 1
; CHECK-NEXT: call void @unknown(ptr nocapture nonnull [[M1]])
; CHECK-NEXT: ret i1 false
;
%m = alloca i8, i32 4
call void @unknown(ptr nocapture %m)
%n = alloca i8, i32 4
%rhs = getelementptr i8, ptr %n, i32 4
%cmp = icmp eq ptr %m, %rhs
ret i1 %cmp
}
@gp = global ptr null, align 8
define i1 @consistent_nocapture_through_global() {
; CHECK-LABEL: @consistent_nocapture_through_global(
; CHECK-NEXT: [[M1:%.*]] = alloca [4 x i8], align 1
; CHECK-NEXT: call void @unknown(ptr nocapture nonnull [[M1]])
; CHECK-NEXT: ret i1 false
;
%m = alloca i8, i32 4
call void @unknown(ptr nocapture %m)
%lgp = load ptr, ptr @gp, align 8, !nonnull !{}
%cmp = icmp eq ptr %m, %lgp
ret i1 %cmp
}
define void @select_alloca_unrelated_ptr(i1 %c, ptr %p, ptr %p2) {
; CHECK-LABEL: @select_alloca_unrelated_ptr(
; CHECK-NEXT: [[M:%.*]] = alloca i8, align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[P:%.*]]
; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], ptr [[M]], ptr [[P2:%.*]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[S]], [[P]]
; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
; CHECK-NEXT: ret void
;
%m = alloca i8
%cmp1 = icmp eq ptr %m, %p
%s = select i1 %c, ptr %m, ptr %p2
%cmp2 = icmp eq ptr %s, %p
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
define void @alloca_offset_icmp(ptr %p, i32 %offset) {
; CHECK-LABEL: @alloca_offset_icmp(
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[OFFSET:%.*]], 0
; CHECK-NEXT: call void @witness(i1 false, i1 [[CMP2]])
; CHECK-NEXT: ret void
;
%m = alloca [4 x i8]
%g = getelementptr i8, ptr %m, i32 %offset
%cmp1 = icmp eq ptr %m, %p
%cmp2 = icmp eq ptr %m, %g
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}