Files
clang-p2996/llvm/test/Transforms/InstCombine/atomic.ll
Arthur Eubanks 05392466f0 Reland [IR] Increase max alignment to 4GB
Currently the max alignment representable is 1GB, see D108661.
Setting the align of an object to 4GB is desirable in some cases to make sure the lower 32 bits are clear which can be used for some optimizations, e.g. https://crbug.com/1016945.

This uses an extra bit in instructions that carry an alignment. We can store 15 bits of "free" information, and with this change some instructions (e.g. AtomicCmpXchgInst) use 14 bits.
We can increase the max alignment representable above 4GB (up to 2^62) since we're only using 33 of the 64 values, but I've just limited it to 4GB for now.

The one place we have to update the bitcode format is for the alloca instruction. It stores its alignment into 5 bits of a 32 bit bitfield. I've added another field which is 8 bits and should be future proof for a while. For backward compatibility, we check if the old field has a value and use that, otherwise use the new field.

Updating clang's max allowed alignment will come in a future patch.

Reviewed By: hans

Differential Revision: https://reviews.llvm.org/D110451
2021-10-06 13:29:23 -07:00

425 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S < %s -instcombine | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.0"
; Check transforms involving atomic operations
define i32 @test1(i32* %p) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p seq_cst, align 4
%y = load i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
define i32 @test2(i32* %p) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
; CHECK-NEXT: [[Y:%.*]] = load volatile i32, i32* [[P]], align 4
; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]]
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load volatile i32, i32* %p, align 4
%y = load volatile i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
; The exact semantics of mixing volatile and non-volatile on the same
; memory location are a bit unclear, but conservatively, we know we don't
; want to remove the volatile.
define i32 @test3(i32* %p) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* [[P:%.*]], align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load volatile i32, i32* %p, align 4
%y = load i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
; Forwarding from a stronger ordered atomic is fine
define i32 @test4(i32* %p) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p seq_cst, align 4
%y = load atomic i32, i32* %p unordered, align 4
%z = add i32 %x, %y
ret i32 %z
}
; Forwarding from a non-atomic is not. (The earlier load
; could in priciple be promoted to atomic and then forwarded,
; but we can't just drop the atomic from the load.)
define i32 @test5(i32* %p) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p unordered, align 4
%y = load i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
; Forwarding atomic to atomic is fine
define i32 @test6(i32* %p) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4
; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X]], 1
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p unordered, align 4
%y = load atomic i32, i32* %p unordered, align 4
%z = add i32 %x, %y
ret i32 %z
}
; FIXME: we currently don't do anything for monotonic
define i32 @test7(i32* %p) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: [[Y:%.*]] = load atomic i32, i32* [[P]] monotonic, align 4
; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]]
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p seq_cst, align 4
%y = load atomic i32, i32* %p monotonic, align 4
%z = add i32 %x, %y
ret i32 %z
}
; FIXME: We could forward in racy code
define i32 @test8(i32* %p) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: [[Y:%.*]] = load atomic i32, i32* [[P]] acquire, align 4
; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]]
; CHECK-NEXT: ret i32 [[Z]]
;
%x = load atomic i32, i32* %p seq_cst, align 4
%y = load atomic i32, i32* %p acquire, align 4
%z = add i32 %x, %y
ret i32 %z
}
; An unordered access to null is still unreachable. There's no
; ordering imposed.
define i32 @test9() {
; CHECK-LABEL: @test9(
; CHECK-NEXT: store i32 poison, i32* null, align 4294967296
; CHECK-NEXT: ret i32 poison
;
%x = load atomic i32, i32* null unordered, align 4
ret i32 %x
}
define i32 @test9_no_null_opt() #0 {
; CHECK-LABEL: @test9_no_null_opt(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null unordered, align 4294967296
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null unordered, align 4
ret i32 %x
}
; FIXME: Could also fold
define i32 @test10() {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null monotonic, align 4294967296
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null monotonic, align 4
ret i32 %x
}
define i32 @test10_no_null_opt() #0 {
; CHECK-LABEL: @test10_no_null_opt(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null monotonic, align 4294967296
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null monotonic, align 4
ret i32 %x
}
; Would this be legal to fold? Probably?
define i32 @test11() {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null seq_cst, align 4294967296
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null seq_cst, align 4
ret i32 %x
}
define i32 @test11_no_null_opt() #0 {
; CHECK-LABEL: @test11_no_null_opt(
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* null seq_cst, align 4294967296
; CHECK-NEXT: ret i32 [[X]]
;
%x = load atomic i32, i32* null seq_cst, align 4
ret i32 %x
}
; An unordered access to null is still unreachable. There's no
; ordering imposed.
define i32 @test12() {
; CHECK-LABEL: @test12(
; CHECK-NEXT: store atomic i32 poison, i32* null unordered, align 4294967296
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null unordered, align 4
ret i32 0
}
define i32 @test12_no_null_opt() #0 {
; CHECK-LABEL: @test12_no_null_opt(
; CHECK-NEXT: store atomic i32 0, i32* null unordered, align 4294967296
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null unordered, align 4
ret i32 0
}
; FIXME: Could also fold
define i32 @test13() {
; CHECK-LABEL: @test13(
; CHECK-NEXT: store atomic i32 0, i32* null monotonic, align 4294967296
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null monotonic, align 4
ret i32 0
}
define i32 @test13_no_null_opt() #0 {
; CHECK-LABEL: @test13_no_null_opt(
; CHECK-NEXT: store atomic i32 0, i32* null monotonic, align 4294967296
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null monotonic, align 4
ret i32 0
}
; Would this be legal to fold? Probably?
define i32 @test14() {
; CHECK-LABEL: @test14(
; CHECK-NEXT: store atomic i32 0, i32* null seq_cst, align 4294967296
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null seq_cst, align 4
ret i32 0
}
define i32 @test14_no_null_opt() #0 {
; CHECK-LABEL: @test14_no_null_opt(
; CHECK-NEXT: store atomic i32 0, i32* null seq_cst, align 4294967296
; CHECK-NEXT: ret i32 0
;
store atomic i32 0, i32* null seq_cst, align 4
ret i32 0
}
@a = external global i32
@b = external global i32
define i32 @test15(i1 %cnd) {
; CHECK-LABEL: @test15(
; CHECK-NEXT: [[A_VAL:%.*]] = load atomic i32, i32* @a unordered, align 4
; CHECK-NEXT: [[B_VAL:%.*]] = load atomic i32, i32* @b unordered, align 4
; CHECK-NEXT: [[X:%.*]] = select i1 [[CND:%.*]], i32 [[A_VAL]], i32 [[B_VAL]]
; CHECK-NEXT: ret i32 [[X]]
;
%addr = select i1 %cnd, i32* @a, i32* @b
%x = load atomic i32, i32* %addr unordered, align 4
ret i32 %x
}
; FIXME: This would be legal to transform
define i32 @test16(i1 %cnd) {
; CHECK-LABEL: @test16(
; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], i32* @a, i32* @b
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[ADDR]] monotonic, align 4
; CHECK-NEXT: ret i32 [[X]]
;
%addr = select i1 %cnd, i32* @a, i32* @b
%x = load atomic i32, i32* %addr monotonic, align 4
ret i32 %x
}
; FIXME: This would be legal to transform
define i32 @test17(i1 %cnd) {
; CHECK-LABEL: @test17(
; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CND:%.*]], i32* @a, i32* @b
; CHECK-NEXT: [[X:%.*]] = load atomic i32, i32* [[ADDR]] seq_cst, align 4
; CHECK-NEXT: ret i32 [[X]]
;
%addr = select i1 %cnd, i32* @a, i32* @b
%x = load atomic i32, i32* %addr seq_cst, align 4
ret i32 %x
}
define i32 @test22(i1 %cnd) {
; CHECK-LABEL: @test22(
; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]]
; CHECK: block1:
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: block2:
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 2, [[BLOCK2]] ], [ 1, [[BLOCK1]] ]
; CHECK-NEXT: store atomic i32 [[STOREMERGE]], i32* @a unordered, align 4
; CHECK-NEXT: ret i32 0
;
br i1 %cnd, label %block1, label %block2
block1:
store atomic i32 1, i32* @a unordered, align 4
br label %merge
block2:
store atomic i32 2, i32* @a unordered, align 4
br label %merge
merge:
ret i32 0
}
; TODO: probably also legal here
define i32 @test23(i1 %cnd) {
; CHECK-LABEL: @test23(
; CHECK-NEXT: br i1 [[CND:%.*]], label [[BLOCK1:%.*]], label [[BLOCK2:%.*]]
; CHECK: block1:
; CHECK-NEXT: store atomic i32 1, i32* @a monotonic, align 4
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: block2:
; CHECK-NEXT: store atomic i32 2, i32* @a monotonic, align 4
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: ret i32 0
;
br i1 %cnd, label %block1, label %block2
block1:
store atomic i32 1, i32* @a monotonic, align 4
br label %merge
block2:
store atomic i32 2, i32* @a monotonic, align 4
br label %merge
merge:
ret i32 0
}
declare void @clobber()
define i32 @test18(float* %p) {
; CHECK-LABEL: @test18(
; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] unordered, align 4
; CHECK-NEXT: call void @clobber()
; CHECK-NEXT: store atomic float [[X]], float* [[P]] unordered, align 4
; CHECK-NEXT: ret i32 0
;
%x = load atomic float, float* %p unordered, align 4
call void @clobber() ;; keep the load around
store atomic float %x, float* %p unordered, align 4
ret i32 0
}
; TODO: probably also legal in this case
define i32 @test19(float* %p) {
; CHECK-LABEL: @test19(
; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: call void @clobber()
; CHECK-NEXT: store atomic float [[X]], float* [[P]] seq_cst, align 4
; CHECK-NEXT: ret i32 0
;
%x = load atomic float, float* %p seq_cst, align 4
call void @clobber() ;; keep the load around
store atomic float %x, float* %p seq_cst, align 4
ret i32 0
}
define i32 @test20(i32** %p, i8* %v) {
; CHECK-LABEL: @test20(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32** [[P:%.*]] to i8**
; CHECK-NEXT: store atomic i8* [[V:%.*]], i8** [[TMP1]] unordered, align 4
; CHECK-NEXT: ret i32 0
;
%cast = bitcast i8* %v to i32*
store atomic i32* %cast, i32** %p unordered, align 4
ret i32 0
}
define i32 @test21(i32** %p, i8* %v) {
; CHECK-LABEL: @test21(
; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[V:%.*]] to i32*
; CHECK-NEXT: store atomic i32* [[CAST]], i32** [[P:%.*]] monotonic, align 4
; CHECK-NEXT: ret i32 0
;
%cast = bitcast i8* %v to i32*
store atomic i32* %cast, i32** %p monotonic, align 4
ret i32 0
}
define void @pr27490a(i8** %p1, i8** %p2) {
; CHECK-LABEL: @pr27490a(
; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8
; CHECK-NEXT: store volatile i8* [[L]], i8** [[P2:%.*]], align 8
; CHECK-NEXT: ret void
;
%l = load i8*, i8** %p1
store volatile i8* %l, i8** %p2
ret void
}
define void @pr27490b(i8** %p1, i8** %p2) {
; CHECK-LABEL: @pr27490b(
; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8
; CHECK-NEXT: store atomic i8* [[L]], i8** [[P2:%.*]] seq_cst, align 8
; CHECK-NEXT: ret void
;
%l = load i8*, i8** %p1
store atomic i8* %l, i8** %p2 seq_cst, align 8
ret void
}
;; At the moment, we can't form atomic vectors by folding since these are
;; not representable in the IR. This was pr29121. The right long term
;; solution is to extend the IR to handle this case.
define <2 x float> @no_atomic_vector_load(i64* %p) {
; CHECK-LABEL: @no_atomic_vector_load(
; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, i64* [[P:%.*]] unordered, align 8
; CHECK-NEXT: [[DOTCAST:%.*]] = bitcast i64 [[LOAD]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[DOTCAST]]
;
%load = load atomic i64, i64* %p unordered, align 8
%.cast = bitcast i64 %load to <2 x float>
ret <2 x float> %.cast
}
define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) {
; CHECK-LABEL: @no_atomic_vector_store(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[P:%.*]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[P2:%.*]] to i64*
; CHECK-NEXT: store atomic i64 [[TMP1]], i64* [[TMP2]] unordered, align 8
; CHECK-NEXT: ret void
;
%1 = bitcast <2 x float> %p to i64
%2 = bitcast i8* %p2 to i64*
store atomic i64 %1, i64* %2 unordered, align 8
ret void
}
attributes #0 = { null_pointer_is_valid }