Files
clang-p2996/llvm/test/Transforms/InstCombine/atomic.ll
Roman Lebedev e00f189d39 [InstCombine] Revert rL226781 "Teach InstCombine to canonicalize loads which are only ever stored to always use a legal integer type if one is available." (PR47592)
(it was introduced in https://lists.llvm.org/pipermail/llvm-dev/2015-January/080956.html)

This canonicalization seems dubious.

Most importantly, while it does not create `inttoptr` casts by itself,
it may cause them to appear later, see e.g. D88788.

I think it's pretty obvious that it is an undesirable outcome,
by now we've established that seemingly no-op `inttoptr`/`ptrtoint` casts
are not no-op, and are no longer eager to look past them.
Which e.g. means that given
```
%a = load i32
%b = inttoptr %a
%c = inttoptr %a
```
we likely won't be able to tell that `%b` and `%c` is the same thing.

As we can see in D88789 / D88788 / D88806 / D75505,
we can't really teach SCEV about this (not without the https://bugs.llvm.org/show_bug.cgi?id=47592 at least)
And we can't recover the situation post-inlining in instcombine.

So it really does look like this fold is actively breaking
otherwise-good IR, in a way that is not recoverable.
And that means, this fold isn't helpful in exposing the passes
that are otherwise unaware of these patterns it produces.

Thusly, i propose to simply not perform such a canonicalization.
The original motivational RFC does not state what larger problem
that canonicalization was trying to solve, so i'm not sure
how this plays out in the larger picture.

On vanilla llvm test-suite + RawSpeed, this results in
increase of asm instructions and final object size by ~+0.05%
decreases final count of bitcasts by -4.79% (-28990),
ptrtoint casts by -15.41% (-3423),
and of inttoptr casts by -25.59% (-6919, *sic*).
Overall, there's -0.04% less IR blocks, -0.39% instructions.

See https://bugs.llvm.org/show_bug.cgi?id=47592

Differential Revision: https://reviews.llvm.org/D88789
2020-10-06 00:00:30 +03:00

425 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S < %s -instcombine | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.0"
; Check transforms involving atomic operations
define i32 @test1(i32* %p) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p seq_cst, align 4
%y = load i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
define i32 @test2(i32* %p) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
; CHECK-NEXT: [[Y:%.*]] = load volatile i32, i32* [[P]], align 4
; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]]
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load volatile i32, i32* %p, align 4
%y = load volatile i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
; The exact semantics of mixing volatile and non-volatile on the same
; memory location are a bit unclear, but conservatively, we know we don't
; want to remove the volatile.
define i32 @test3(i32* %p) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load volatile i32, i32* %p, align 4
%y = load i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
; Forwarding from a stronger ordered atomic is fine
define i32 @test4(i32* %p) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p seq_cst, align 4
%y = load atomic i32, i32* %p unordered, align 4
%z = add i32 %x, %y
ret i32 %z
}
; Forwarding from a non-atomic is not. (The earlier load
; could in priciple be promoted to atomic and then forwarded,
; but we can't just drop the atomic from the load.)
define i32 @test5(i32* %p) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p unordered, align 4
%y = load i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
; Forwarding atomic to atomic is fine
define i32 @test6(i32* %p) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p unordered, align 4
%y = load atomic i32, i32* %p unordered, align 4
%z = add i32 %x, %y
ret i32 %z
}
; FIXME: we currently don't do anything for monotonic
define i32 @test7(i32* %p) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: [[Y:%.*]] = load atomic i32, i32* [[P]] monotonic, align 4
; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]]
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p seq_cst, align 4
%y = load atomic i32, i32* %p monotonic, align 4
%z = add i32 %x, %y
ret i32 %z
}
; FIXME: We could forward in racy code
define i32 @test8(i32* %p) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: [[Y:%.*]] = load atomic i32, i32* [[P]] acquire, align 4
; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]]
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p seq_cst, align 4
%y = load atomic i32, i32* %p acquire, align 4
%z = add i32 %x, %y
ret i32 %z
}
; An unordered access to null is still unreachable. There's no
; ordering imposed.
define i32 @test9() {
; CHECK-LABEL: @test9(
; CHECK-NEXT: store i32 undef, i32* null, align 536870912
; CHECK-NEXT: ret i32 undef
;
%x = load atomic i32, i32* null unordered, align 4
ret i32 %x
}
define i32 @test9_no_null_opt() #0 {
; CHECK-LABEL: @test9_no_null_opt(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null unordered, align 536870912
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null unordered, align 4
ret i32 %x
}
; FIXME: Could also fold
define i32 @test10() {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null monotonic, align 536870912
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null monotonic, align 4
ret i32 %x
}
define i32 @test10_no_null_opt() #0 {
; CHECK-LABEL: @test10_no_null_opt(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null monotonic, align 536870912
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null monotonic, align 4
ret i32 %x
}
; Would this be legal to fold? Probably?
define i32 @test11() {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null seq_cst, align 536870912
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null seq_cst, align 4
ret i32 %x
}
define i32 @test11_no_null_opt() #0 {
; CHECK-LABEL: @test11_no_null_opt(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null seq_cst, align 536870912
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null seq_cst, align 4
ret i32 %x
}
; An unordered access to null is still unreachable. There's no
; ordering imposed.
define i32 @test12() {
; CHECK-LABEL: @test12(
; CHECK-NEXT: store atomic i32 undef, i32* null unordered, align 536870912
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null unordered, align 4
ret i32 0
}
define i32 @test12_no_null_opt() #0 {
; CHECK-LABEL: @test12_no_null_opt(
; CHECK-NEXT: store atomic i32 0, i32* null unordered, align 536870912
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null unordered, align 4
ret i32 0
}
; FIXME: Could also fold
define i32 @test13() {
; CHECK-LABEL: @test13(
; CHECK-NEXT: store atomic i32 0, i32* null monotonic, align 536870912
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null monotonic, align 4
ret i32 0
}
define i32 @test13_no_null_opt() #0 {
; CHECK-LABEL: @test13_no_null_opt(
; CHECK-NEXT: store atomic i32 0, i32* null monotonic, align 536870912
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null monotonic, align 4
ret i32 0
}
; Would this be legal to fold? Probably?
define i32 @test14() {
; CHECK-LABEL: @test14(
; CHECK-NEXT: store atomic i32 0, i32* null seq_cst, align 536870912
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null seq_cst, align 4
ret i32 0
}
define i32 @test14_no_null_opt() #0 {
; CHECK-LABEL: @test14_no_null_opt(
; CHECK-NEXT: store atomic i32 0, i32* null seq_cst, align 536870912
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null seq_cst, align 4
ret i32 0
}
@a = external global i32
@b = external global i32
define i32 @test15(i1 %cnd) {
; CHECK-LABEL: @test15(
; CHECK-NEXT: [[A_VAL:%.*]] = load atomic i32, i32* @a unordered, align 4
; CHECK-NEXT: [[B_VAL:%.*]] = load atomic i32, i32* @b unordered, align 4
; CHECK-NEXT: [[X:%.*]] = select i1 [[CND:%.*]], i32 [[A_VAL]], i32 [[B_VAL]]
; CHECK-NEXT: ret i32 [[X]]
;
%addr = select i1 %cnd, i32* @a, i32* @b
%x = load atomic i32, i32* %addr unordered, align 4
ret i32 %x
}
; FIXME: This would be legal to transform
define i32 @test16(i1 %cnd) {
; CHECK-LABEL: @test16(
; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], i32* @a, i32* @b
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[ADDR]] monotonic, align 4
; CHECK-NEXT: ret i32 [[X]]
;
%addr = select i1 %cnd, i32* @a, i32* @b
%x = load atomic i32, i32* %addr monotonic, align 4
ret i32 %x
}
; FIXME: This would be legal to transform
define i32 @test17(i1 %cnd) {
; CHECK-LABEL: @test17(
; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], i32* @a, i32* @b
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[ADDR]] seq_cst, align 4
; CHECK-NEXT: ret i32 [[X]]
;
%addr = select i1 %cnd, i32* @a, i32* @b
%x = load atomic i32, i32* %addr seq_cst, align 4
ret i32 %x
}
define i32 @test22(i1 %cnd) {
; CHECK-LABEL: @test22(
; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]]
; CHECK: block1:
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: block2:
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 2, [[BLOCK2]] ], [ 1, [[BLOCK1]] ]
; CHECK-NEXT: store atomic i32 [[STOREMERGE]], i32* @a unordered, align 4
; CHECK-NEXT: ret i32 0
;
br i1 %cnd, label %block1, label %block2
block1:
store atomic i32 1, i32* @a unordered, align 4
br label %merge
block2:
store atomic i32 2, i32* @a unordered, align 4
br label %merge
merge:
ret i32 0
}
; TODO: probably also legal here
define i32 @test23(i1 %cnd) {
; CHECK-LABEL: @test23(
; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]]
; CHECK: block1:
; CHECK-NEXT: store atomic i32 1, i32* @a monotonic, align 4
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: block2:
; CHECK-NEXT: store atomic i32 2, i32* @a monotonic, align 4
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: ret i32 0
;
br i1 %cnd, label %block1, label %block2
block1:
store atomic i32 1, i32* @a monotonic, align 4
br label %merge
block2:
store atomic i32 2, i32* @a monotonic, align 4
br label %merge
merge:
ret i32 0
}
declare void @clobber()
define i32 @test18(float* %p) {
; CHECK-LABEL: @test18(
; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] unordered, align 4
; CHECK-NEXT: call void @clobber()
; CHECK-NEXT: store atomic float [[X]], float* [[P]] unordered, align 4
; CHECK-NEXT: ret i32 0
;
%x = load atomic float, float* %p unordered, align 4
call void @clobber() ;; keep the load around
store atomic float %x, float* %p unordered, align 4
ret i32 0
}
; TODO: probably also legal in this case
define i32 @test19(float* %p) {
; CHECK-LABEL: @test19(
; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: call void @clobber()
; CHECK-NEXT: store atomic float [[X]], float* [[P]] seq_cst, align 4
; CHECK-NEXT: ret i32 0
;
%x = load atomic float, float* %p seq_cst, align 4
call void @clobber() ;; keep the load around
store atomic float %x, float* %p seq_cst, align 4
ret i32 0
}
define i32 @test20(i32** %p, i8* %v) {
; CHECK-LABEL: @test20(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** [[P:%.*]] to i8**
; CHECK-NEXT: store atomic i8* [[V:%.*]], i8** [[TMP1]] unordered, align 4
; CHECK-NEXT: ret i32 0
;
%cast = bitcast i8* %v to i32*
store atomic i32* %cast, i32** %p unordered, align 4
ret i32 0
}
define i32 @test21(i32** %p, i8* %v) {
; CHECK-LABEL: @test21(
; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[V:%.*]] to i32*
; CHECK-NEXT: store atomic i32* [[CAST]], i32** [[P:%.*]] monotonic, align 4
; CHECK-NEXT: ret i32 0
;
%cast = bitcast i8* %v to i32*
store atomic i32* %cast, i32** %p monotonic, align 4
ret i32 0
}
define void @pr27490a(i8** %p1, i8** %p2) {
; CHECK-LABEL: @pr27490a(
; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8
; CHECK-NEXT: store volatile i8* [[L]], i8** [[P2:%.*]], align 8
; CHECK-NEXT: ret void
;
%l = load i8*, i8** %p1
store volatile i8* %l, i8** %p2
ret void
}
define void @pr27490b(i8** %p1, i8** %p2) {
; CHECK-LABEL: @pr27490b(
; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8
; CHECK-NEXT: store atomic i8* [[L]], i8** [[P2:%.*]] seq_cst, align 8
; CHECK-NEXT: ret void
;
%l = load i8*, i8** %p1
store atomic i8* %l, i8** %p2 seq_cst, align 8
ret void
}
;; At the moment, we can't form atomic vectors by folding since these are
;; not representable in the IR. This was pr29121. The right long term
;; solution is to extend the IR to handle this case.
define <2 x float> @no_atomic_vector_load(i64* %p) {
; CHECK-LABEL: @no_atomic_vector_load(
; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, i64* [[P:%.*]] unordered, align 8
; CHECK-NEXT: [[DOTCAST:%.*]] = bitcast i64 [[LOAD]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[DOTCAST]]
;
%load = load atomic i64, i64* %p unordered, align 8
%.cast = bitcast i64 %load to <2 x float>
ret <2 x float> %.cast
}
define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) {
; CHECK-LABEL: @no_atomic_vector_store(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[P:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[P2:%.*]] to i64*
; CHECK-NEXT: store atomic i64 [[TMP1]], i64* [[TMP2]] unordered, align 8
; CHECK-NEXT: ret void
;
%1 = bitcast <2 x float> %p to i64
%2 = bitcast i8* %p2 to i64*
store atomic i64 %1, i64* %2 unordered, align 8
ret void
}
attributes #0 = { null_pointer_is_valid }