Currently the max alignment representable is 1GB, see D108661. Setting the align of an object to 4GB is desirable in some cases to make sure the lower 32 bits are clear which can be used for some optimizations, e.g. https://crbug.com/1016945. This uses an extra bit in instructions that carry an alignment. We can store 15 bits of "free" information, and with this change some instructions (e.g. AtomicCmpXchgInst) use 14 bits. We can increase the max alignment representable above 4GB (up to 2^62) since we're only using 33 of the 64 values, but I've just limited it to 4GB for now. The one place we have to update the bitcode format is for the alloca instruction. It stores its alignment into 5 bits of a 32 bit bitfield. I've added another field which is 8 bits and should be future proof for a while. For backward compatibility, we check if the old field has a value and use that, otherwise use the new field. Updating clang's max allowed alignment will come in a future patch. Reviewed By: hans Differential Revision: https://reviews.llvm.org/D110451
68 lines
2.8 KiB
LLVM
68 lines
2.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -instcombine -S < %s | FileCheck %s
|
|
|
|
define i8* @memcpy_nonconst_n(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
|
; CHECK-LABEL: @memcpy_nonconst_n(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]]
|
|
; CHECK-NEXT: ret i8* [[TMP1]]
|
|
;
|
|
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
|
|
ret i8* %r
|
|
}
|
|
|
|
define i8* @memcpy_nonconst_n_copy_attrs(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
|
; CHECK-LABEL: @memcpy_nonconst_n_copy_attrs(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]]
|
|
; CHECK-NEXT: ret i8* [[TMP1]]
|
|
;
|
|
%r = tail call i8* @mempcpy(i8* dereferenceable(16) %d, i8* %s, i64 %n)
|
|
ret i8* %r
|
|
}
|
|
|
|
define void @memcpy_nonconst_n_unused_retval(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
|
; CHECK-LABEL: @memcpy_nonconst_n_unused_retval(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
|
|
ret void
|
|
}
|
|
|
|
define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) {
|
|
; CHECK-LABEL: @memcpy_small_const_n(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i64*
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i64*
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
|
|
; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 8
|
|
; CHECK-NEXT: ret i8* [[TMP4]]
|
|
;
|
|
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 8)
|
|
ret i8* %r
|
|
}
|
|
|
|
define i8* @memcpy_big_const_n(i8* %d, i8* nocapture readonly %s) {
|
|
; CHECK-LABEL: @memcpy_big_const_n(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(1024) [[D:%.*]], i8* noundef nonnull align 1 dereferenceable(1024) [[S:%.*]], i64 1024, i1 false)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 1024
|
|
; CHECK-NEXT: ret i8* [[TMP1]]
|
|
;
|
|
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 1024)
|
|
ret i8* %r
|
|
}
|
|
|
|
; The original call may have attributes that can not propagate to memcpy.
|
|
|
|
define i32 @PR48810() {
|
|
; CHECK-LABEL: @PR48810(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 4294967296 null, i64 undef, i1 false)
|
|
; CHECK-NEXT: ret i32 undef
|
|
;
|
|
%r = call dereferenceable(1) i8* @mempcpy(i8* undef, i8* null, i64 undef)
|
|
ret i32 undef
|
|
}
|
|
|
|
declare i8* @mempcpy(i8*, i8* nocapture readonly, i64)
|