Files
clang-p2996/llvm/test/Transforms/InstCombine/mempcpy.ll
Roman Lebedev 564d85e090 The maximal representable alignment in LLVM IR is 1GiB, not 512MiB
In LLVM IR, `AlignmentBitfieldElementT` is 5-bit wide
But that means that the maximal alignment exponent is `(1<<5)-2`,
which is `30`, not `29`. And indeed, alignment of `1073741824`
roundtrips IR serialization-deserialization.

While this doesn't seem all that important, this doubles
the maximal supported alignment from 512MiB to 1GiB,
and there's actually one noticeable use-case for that;
On X86, the huge pages can have sizes of 2MiB and 1GiB (!).

So while this doesn't add support for truly huge alignments,
which i think we can easily-ish do if wanted, i think this adds
zero-cost support for a not-trivially-dismissable case.

I don't believe we need any upgrade infrastructure,
and since we don't explicitly record the IR version,
we don't need to bump one either.

As @craig.topper speculates in D108661#2963519,
this might be an artificial limit imposed by the original implementation
of the `getAlignment()` functions.

Differential Revision: https://reviews.llvm.org/D108661
2021-08-26 12:53:39 +03:00

68 lines
2.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s
define i8* @memcpy_nonconst_n(i8* %d, i8* nocapture readonly %s, i64 %n) {
; CHECK-LABEL: @memcpy_nonconst_n(
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]]
; CHECK-NEXT: ret i8* [[TMP1]]
;
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
ret i8* %r
}
define i8* @memcpy_nonconst_n_copy_attrs(i8* %d, i8* nocapture readonly %s, i64 %n) {
; CHECK-LABEL: @memcpy_nonconst_n_copy_attrs(
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]]
; CHECK-NEXT: ret i8* [[TMP1]]
;
%r = tail call i8* @mempcpy(i8* dereferenceable(16) %d, i8* %s, i64 %n)
ret i8* %r
}
define void @memcpy_nonconst_n_unused_retval(i8* %d, i8* nocapture readonly %s, i64 %n) {
; CHECK-LABEL: @memcpy_nonconst_n_unused_retval(
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
; CHECK-NEXT: ret void
;
call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
ret void
}
define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) {
; CHECK-LABEL: @memcpy_small_const_n(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i64*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i64*
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 8
; CHECK-NEXT: ret i8* [[TMP4]]
;
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 8)
ret i8* %r
}
define i8* @memcpy_big_const_n(i8* %d, i8* nocapture readonly %s) {
; CHECK-LABEL: @memcpy_big_const_n(
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(1024) [[D:%.*]], i8* noundef nonnull align 1 dereferenceable(1024) [[S:%.*]], i64 1024, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 1024
; CHECK-NEXT: ret i8* [[TMP1]]
;
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 1024)
ret i8* %r
}
; The original call may have attributes that can not propagate to memcpy.
define i32 @PR48810() {
; CHECK-LABEL: @PR48810(
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1073741824 null, i64 undef, i1 false)
; CHECK-NEXT: ret i32 undef
;
%r = call dereferenceable(1) i8* @mempcpy(i8* undef, i8* null, i64 undef)
ret i32 undef
}
declare i8* @mempcpy(i8*, i8* nocapture readonly, i64)