Previously the inliner always produced a memcpy with alignment 1 for src and destination, leading to potentially suboptimal Codegen. Since the Src ptr alignment is only available through the CallBase it has to be passed to HandleByValArgumentInit. Dst Alignment is already known so it doesn't have to be passed along. If there is no specified Src Alignment my changes cause the ptr to have no align data attached instead of align 1 as before (see inline-tail.ll), I believe this is fine but since I'm a first time contributor, please confirm. My changes are already covered by 4 existing regression tests, so I did not add any additional ones. The example from #45778 now results in: ```C opt -S -passes=inline,instcombine,sroa,instcombine test.ll define dso_local i32 @test(ptr %t) { entry: %.sroa.0.0.copyload = load ptr, ptr %t, align 8 # this used to be align 1 in the original issue %arrayidx.i = getelementptr inbounds nuw i8, ptr %.sroa.0.0.copyload, i64 24 %0 = load i32, ptr %arrayidx.i, align 4 ret i32 %0 } ``` Fixes #45778.
240 lines
8.0 KiB
LLVM
240 lines
8.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt < %s -passes=inline -S | FileCheck %s
|
|
|
|
; We have to apply the less restrictive TailCallKind of the call site being
|
|
; inlined and any call sites cloned into the caller.
|
|
|
|
; No tail marker after inlining, since test_capture_c captures an alloca.
|
|
declare void @test_capture_c(ptr)
|
|
define internal void @test_capture_b(ptr %P) {
|
|
tail call void @test_capture_c(ptr %P)
|
|
ret void
|
|
}
|
|
define void @test_capture_a() {
|
|
; CHECK-LABEL: define void @test_capture_a() {
|
|
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: call void @test_capture_c(ptr [[A]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%A = alloca i32 ; captured by test_capture_b
|
|
call void @test_capture_b(ptr %A)
|
|
ret void
|
|
}
|
|
|
|
; No musttail marker after inlining, since the prototypes don't match.
|
|
declare void @test_proto_mismatch_c(ptr)
|
|
define internal void @test_proto_mismatch_b(ptr %p) {
|
|
musttail call void @test_proto_mismatch_c(ptr %p)
|
|
ret void
|
|
}
|
|
define void @test_proto_mismatch_a() {
|
|
; CHECK-LABEL: define void @test_proto_mismatch_a() {
|
|
; CHECK-NEXT: call void @test_proto_mismatch_c(ptr null)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @test_proto_mismatch_b(ptr null)
|
|
ret void
|
|
}
|
|
|
|
; After inlining through a musttail call site, we need to keep musttail markers
|
|
; to prevent unbounded stack growth.
|
|
declare void @test_musttail_basic_c(ptr %p)
|
|
define internal void @test_musttail_basic_b(ptr %p) {
|
|
musttail call void @test_musttail_basic_c(ptr %p)
|
|
ret void
|
|
}
|
|
define void @test_musttail_basic_a(ptr %p) {
|
|
; CHECK-LABEL: define void @test_musttail_basic_a
|
|
; CHECK-SAME: (ptr [[P:%.*]]) {
|
|
; CHECK-NEXT: musttail call void @test_musttail_basic_c(ptr [[P]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
musttail call void @test_musttail_basic_b(ptr %p)
|
|
ret void
|
|
}
|
|
|
|
; Don't insert lifetime end markers here, the lifetime is trivially over due
|
|
; the return.
|
|
declare void @test_byval_c(ptr byval(i32) %p)
|
|
define internal void @test_byval_b(ptr byval(i32) %p) {
|
|
musttail call void @test_byval_c(ptr byval(i32) %p)
|
|
ret void
|
|
}
|
|
define void @test_byval_a(ptr byval(i32) %p) {
|
|
; CHECK-LABEL: define void @test_byval_a
|
|
; CHECK-SAME: (ptr byval(i32) [[P:%.*]]) {
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
|
|
; CHECK-NEXT: musttail call void @test_byval_c(ptr byval(i32) [[P1]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
musttail call void @test_byval_b(ptr byval(i32) %p)
|
|
ret void
|
|
}
|
|
|
|
; Don't insert a stack restore, we're about to return.
|
|
declare void @escape(ptr %buf)
|
|
declare void @test_dynalloca_c(ptr byval(i32) %p, i32 %n)
|
|
define internal void @test_dynalloca_b(ptr byval(i32) %p, i32 %n) alwaysinline {
|
|
%buf = alloca i8, i32 %n ; dynamic alloca
|
|
call void @escape(ptr %buf) ; escape it
|
|
musttail call void @test_dynalloca_c(ptr byval(i32) %p, i32 %n)
|
|
ret void
|
|
}
|
|
define void @test_dynalloca_a(ptr byval(i32) %p, i32 %n) {
|
|
; CHECK-LABEL: define void @test_dynalloca_a
|
|
; CHECK-SAME: (ptr byval(i32) [[P:%.*]], i32 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[P1:%.*]] = alloca i32, align 4
|
|
; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
|
|
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
|
|
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
|
|
; CHECK-NEXT: [[BUF_I:%.*]] = alloca i8, i32 [[N]], align 1
|
|
; CHECK-NEXT: call void @escape(ptr [[BUF_I]])
|
|
; CHECK-NEXT: musttail call void @test_dynalloca_c(ptr byval(i32) [[P1]], i32 [[N]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
musttail call void @test_dynalloca_b(ptr byval(i32) %p, i32 %n)
|
|
ret void
|
|
}
|
|
|
|
; We can't merge the returns.
|
|
declare void @test_multiret_c(i1 zeroext %b)
|
|
declare void @test_multiret_d(i1 zeroext %b)
|
|
define internal void @test_multiret_b(i1 zeroext %b) {
|
|
br i1 %b, label %c, label %d
|
|
c:
|
|
musttail call void @test_multiret_c(i1 zeroext %b)
|
|
ret void
|
|
d:
|
|
musttail call void @test_multiret_d(i1 zeroext %b)
|
|
ret void
|
|
}
|
|
define void @test_multiret_a(i1 zeroext %b) {
|
|
; CHECK-LABEL: define void @test_multiret_a
|
|
; CHECK-SAME: (i1 zeroext [[B:%.*]]) {
|
|
; CHECK-NEXT: br i1 [[B]], label [[C_I:%.*]], label [[D_I:%.*]]
|
|
; CHECK: c.i:
|
|
; CHECK-NEXT: musttail call void @test_multiret_c(i1 zeroext [[B]])
|
|
; CHECK-NEXT: ret void
|
|
; CHECK: d.i:
|
|
; CHECK-NEXT: musttail call void @test_multiret_d(i1 zeroext [[B]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
musttail call void @test_multiret_b(i1 zeroext %b)
|
|
ret void
|
|
}
|
|
|
|
; We have to avoid bitcast chains.
|
|
declare ptr @test_retptr_c()
|
|
define internal ptr @test_retptr_b() {
|
|
%rv = musttail call ptr @test_retptr_c()
|
|
ret ptr %rv
|
|
}
|
|
define ptr @test_retptr_a() {
|
|
; CHECK-LABEL: define ptr @test_retptr_a() {
|
|
; CHECK-NEXT: [[RV_I:%.*]] = musttail call ptr @test_retptr_c()
|
|
; CHECK-NEXT: ret ptr [[RV_I]]
|
|
;
|
|
%rv = musttail call ptr @test_retptr_b()
|
|
ret ptr %rv
|
|
}
|
|
|
|
; Combine the last two cases: multiple returns with pointer bitcasts.
|
|
declare ptr @test_multiptrret_c(i1 zeroext %b)
|
|
declare ptr @test_multiptrret_d(i1 zeroext %b)
|
|
define internal ptr @test_multiptrret_b(i1 zeroext %b) {
|
|
br i1 %b, label %c, label %d
|
|
c:
|
|
%c_rv = musttail call ptr @test_multiptrret_c(i1 zeroext %b)
|
|
ret ptr %c_rv
|
|
d:
|
|
%d_rv = musttail call ptr @test_multiptrret_d(i1 zeroext %b)
|
|
ret ptr %d_rv
|
|
}
|
|
define ptr @test_multiptrret_a(i1 zeroext %b) {
|
|
; CHECK-LABEL: define ptr @test_multiptrret_a
|
|
; CHECK-SAME: (i1 zeroext [[B:%.*]]) {
|
|
; CHECK-NEXT: br i1 [[B]], label [[C_I:%.*]], label [[D_I:%.*]]
|
|
; CHECK: c.i:
|
|
; CHECK-NEXT: [[C_RV_I:%.*]] = musttail call ptr @test_multiptrret_c(i1 zeroext [[B]])
|
|
; CHECK-NEXT: ret ptr [[C_RV_I]]
|
|
; CHECK: d.i:
|
|
; CHECK-NEXT: [[D_RV_I:%.*]] = musttail call ptr @test_multiptrret_d(i1 zeroext [[B]])
|
|
; CHECK-NEXT: ret ptr [[D_RV_I]]
|
|
;
|
|
%rv = musttail call ptr @test_multiptrret_b(i1 zeroext %b)
|
|
ret ptr %rv
|
|
}
|
|
|
|
; Inline a musttail call site which contains a normal return and a musttail call.
|
|
declare i32 @test_mixedret_c(i1 zeroext %b)
|
|
declare i32 @test_mixedret_d(i1 zeroext %b)
|
|
define internal i32 @test_mixedret_b(i1 zeroext %b) {
|
|
br i1 %b, label %c, label %d
|
|
c:
|
|
%c_rv = musttail call i32 @test_mixedret_c(i1 zeroext %b)
|
|
ret i32 %c_rv
|
|
d:
|
|
%d_rv = call i32 @test_mixedret_d(i1 zeroext %b)
|
|
%d_rv1 = add i32 1, %d_rv
|
|
ret i32 %d_rv1
|
|
}
|
|
define i32 @test_mixedret_a(i1 zeroext %b) {
|
|
; CHECK-LABEL: define i32 @test_mixedret_a
|
|
; CHECK-SAME: (i1 zeroext [[B:%.*]]) {
|
|
; CHECK-NEXT: br i1 [[B]], label [[C_I:%.*]], label [[TEST_MIXEDRET_B_EXIT:%.*]]
|
|
; CHECK: c.i:
|
|
; CHECK-NEXT: [[C_RV_I:%.*]] = musttail call i32 @test_mixedret_c(i1 zeroext [[B]])
|
|
; CHECK-NEXT: ret i32 [[C_RV_I]]
|
|
; CHECK: test_mixedret_b.exit:
|
|
; CHECK-NEXT: [[D_RV_I:%.*]] = call i32 @test_mixedret_d(i1 zeroext [[B]])
|
|
; CHECK-NEXT: [[D_RV1_I:%.*]] = add i32 1, [[D_RV_I]]
|
|
; CHECK-NEXT: ret i32 [[D_RV1_I]]
|
|
;
|
|
%rv = musttail call i32 @test_mixedret_b(i1 zeroext %b)
|
|
ret i32 %rv
|
|
}
|
|
|
|
declare i32 @donttailcall()
|
|
|
|
define i32 @notail() {
|
|
; CHECK-LABEL: define i32 @notail() {
|
|
; CHECK-NEXT: [[RV:%.*]] = notail call i32 @donttailcall()
|
|
; CHECK-NEXT: ret i32 [[RV]]
|
|
;
|
|
%rv = notail call i32 @donttailcall()
|
|
ret i32 %rv
|
|
}
|
|
|
|
define i32 @test_notail() {
|
|
; CHECK-LABEL: define i32 @test_notail() {
|
|
; CHECK-NEXT: [[RV_I:%.*]] = notail call i32 @donttailcall()
|
|
; CHECK-NEXT: ret i32 [[RV_I]]
|
|
;
|
|
%rv = tail call i32 @notail()
|
|
ret i32 %rv
|
|
}
|
|
|
|
; PR31014: Inlining a musttail call through a notail call site should remove
|
|
; any tail marking, otherwise we break verifier invariants.
|
|
|
|
declare void @do_ret(i32)
|
|
|
|
define void @test_notail_inline_musttail(i32 %a) {
|
|
; CHECK-LABEL: define void @test_notail_inline_musttail
|
|
; CHECK-SAME: (i32 [[A:%.*]]) {
|
|
; CHECK-NEXT: call void @do_ret(i32 [[A]])
|
|
; CHECK-NEXT: musttail call void @do_ret(i32 [[A]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
notail call void @inline_musttail(i32 %a)
|
|
musttail call void @do_ret(i32 %a)
|
|
ret void
|
|
}
|
|
|
|
define internal void @inline_musttail(i32 %a) {
|
|
musttail call void @do_ret(i32 %a)
|
|
ret void
|
|
}
|