In LLVM IR, `AlignmentBitfieldElementT` is 5-bit wide But that means that the maximal alignment exponent is `(1<<5)-2`, which is `30`, not `29`. And indeed, alignment of `1073741824` roundtrips IR serialization-deserialization. While this doesn't seem all that important, this doubles the maximal supported alignment from 512MiB to 1GiB, and there's actually one noticeable use-case for that; On X86, the huge pages can have sizes of 2MiB and 1GiB (!). So while this doesn't add support for truly huge alignments, which i think we can easily-ish do if wanted, i think this adds zero-cost support for a not-trivially-dismissable case. I don't believe we need any upgrade infrastructure, and since we don't explicitly record the IR version, we don't need to bump one either. As @craig.topper speculates in D108661#2963519, this might be an artificial limit imposed by the original implementation of the `getAlignment()` functions. Differential Revision: https://reviews.llvm.org/D108661
68 lines
2.8 KiB
LLVM
68 lines
2.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -instcombine -S < %s | FileCheck %s
|
|
|
|
define i8* @memcpy_nonconst_n(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
|
; CHECK-LABEL: @memcpy_nonconst_n(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]]
|
|
; CHECK-NEXT: ret i8* [[TMP1]]
|
|
;
|
|
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
|
|
ret i8* %r
|
|
}
|
|
|
|
define i8* @memcpy_nonconst_n_copy_attrs(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
|
; CHECK-LABEL: @memcpy_nonconst_n_copy_attrs(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]]
|
|
; CHECK-NEXT: ret i8* [[TMP1]]
|
|
;
|
|
%r = tail call i8* @mempcpy(i8* dereferenceable(16) %d, i8* %s, i64 %n)
|
|
ret i8* %r
|
|
}
|
|
|
|
define void @memcpy_nonconst_n_unused_retval(i8* %d, i8* nocapture readonly %s, i64 %n) {
|
|
; CHECK-LABEL: @memcpy_nonconst_n_unused_retval(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call i8* @mempcpy(i8* %d, i8* %s, i64 %n)
|
|
ret void
|
|
}
|
|
|
|
define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) {
|
|
; CHECK-LABEL: @memcpy_small_const_n(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i64*
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i64*
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
|
|
; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 8
|
|
; CHECK-NEXT: ret i8* [[TMP4]]
|
|
;
|
|
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 8)
|
|
ret i8* %r
|
|
}
|
|
|
|
define i8* @memcpy_big_const_n(i8* %d, i8* nocapture readonly %s) {
|
|
; CHECK-LABEL: @memcpy_big_const_n(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(1024) [[D:%.*]], i8* noundef nonnull align 1 dereferenceable(1024) [[S:%.*]], i64 1024, i1 false)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 1024
|
|
; CHECK-NEXT: ret i8* [[TMP1]]
|
|
;
|
|
%r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 1024)
|
|
ret i8* %r
|
|
}
|
|
|
|
; The original call may have attributes that can not propagate to memcpy.
|
|
|
|
define i32 @PR48810() {
|
|
; CHECK-LABEL: @PR48810(
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1073741824 null, i64 undef, i1 false)
|
|
; CHECK-NEXT: ret i32 undef
|
|
;
|
|
%r = call dereferenceable(1) i8* @mempcpy(i8* undef, i8* null, i64 undef)
|
|
ret i32 undef
|
|
}
|
|
|
|
declare i8* @mempcpy(i8*, i8* nocapture readonly, i64)
|