Files
clang-p2996/llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll
Alexey Lapshin 10c2e26159 [TRE] Reland: allow TRE for non-capturing calls.
The D82085 "allow TRE for non-capturing calls" caused failure during bootstrap.
This patch does the same as D82085 plus fixes bootstrap error.

The problem with D82085 is that it does not create copies for byval
operands, while replacing function call with a branch.

Consider following example:

```
    int zoo ( S p1 );

    int foo ( int count, S p1 ) {
      if ( count > 10 )
        return zoo(p1);

      // temporarily variable created for passing byvalue parameter
      // p1 could be used when zoo(p1) is called(after TRE is done).
      // lifetime.start p1.byvalue.temp
      return foo(count+1, p1);
      // lifetime.end p1.byvalue.temp
    }
```

After recursive call to foo is replaced with a jump into
start of the function, its parameters could be passed to
zoo function. i.e. temporarily variable created for byvalue
parameter "p1" could be passed to zoo. Finally zoo receives
broken operand:

```
    int foo ( int count, S p1 ) {
    :tailrecurse
      p1_tr = phi p1, p1.byvalue.temp
      if ( count > 10 )
        return zoo(p1_tr);

      // temporarily variable created for passing byvalue parameter
      // p1 could be used when zoo(p1) is called(after TRE is done).
      lifetime.start p1.byvalue.temp
      memcpy (p1.byvalue.temp, p1_tr)
      count = count + 1
      lifetime.end p1.byvalue.temp
      br tailrecurse
    }
```

To prevent using p1.byvalue.temp after its scope finished by
lifetime.end marker this patch copies value from p1.byvalue.temp
into another temporarily variable and then copies this variable
into the input parameter for next iteration.

This patch passes bootstrap build and bootstrap build with AddressSanitizer.

Differential Revision: https://reviews.llvm.org/D85614
2021-05-25 11:35:48 +03:00

118 lines
6.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
; the test was generated from the following C++ source:
;
; int zoo ( S p1 );
;
; int foo ( int count, S p1 ) {
; if ( count > 10 )
; return zoo(p1);
;
; // After TRE: temporarily variable created for passing byvalue parameter
; // p1 could be used when zoo(p1) is called.
; return foo(count+1, p1);
; }
; this test checks that value of ByValue operand AGG_TMP_I of call site foo()
; is copied into temporarily variable AGG_TMP_I1(byVal value holder) and
; later the value from AGG_TMP_I1 is copied into function argument P1 before
; new iteration started.
%struct.S = type { i32, i32, float, %struct.B }
%struct.B = type { i32, float }
; Function Attrs: uwtable
define dso_local i32 @_Z3fooi1S(i32 %count, %struct.S* nocapture readonly byval(%struct.S) align 8 %p1) local_unnamed_addr #0 {
; CHECK-LABEL: @_Z3fooi1S(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AGG_TMP_I1:%.*]] = alloca [[STRUCT_S:%.*]], align 8
; CHECK-NEXT: [[AGG_TMP_I:%.*]] = alloca [[STRUCT_S]], align 8
; CHECK-NEXT: [[AGG_TMP14:%.*]] = alloca [[STRUCT_S]], align 8
; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_S]], align 8
; CHECK-NEXT: [[AGG_TMP1:%.*]] = alloca [[STRUCT_S]], align 8
; CHECK-NEXT: br label [[TAILRECURSE:%.*]]
; CHECK: tailrecurse:
; CHECK-NEXT: [[COUNT_TR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[COUNT_TR]], 10
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.S* [[AGG_TMP]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.S* [[P1:%.*]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP0]], i8* nonnull align 8 dereferenceable(20) [[TMP1]], i64 20, i1 false)
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @_Z3zoo1S(%struct.S* nonnull byval(%struct.S) align 8 [[AGG_TMP]])
; CHECK-NEXT: br label [[RETURN:%.*]]
; CHECK: if.end:
; CHECK-NEXT: [[ADD]] = add nsw i32 [[COUNT_TR]], 1
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[AGG_TMP1]] to i8*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[P1]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP2]], i8* nonnull align 8 dereferenceable(20) [[TMP3]], i64 20, i1 false)
; CHECK-NEXT: [[AGG_TMP14_0__SROA_CAST:%.*]] = bitcast %struct.S* [[AGG_TMP14]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull [[AGG_TMP14_0__SROA_CAST]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.S* [[AGG_TMP_I]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull [[TMP4]])
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[AGG_TMP14_0__SROA_CAST]], i8* nonnull align 8 dereferenceable(20) [[TMP2]], i64 20, i1 false)
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP4]], i8* nonnull align 8 dereferenceable(20) [[AGG_TMP14_0__SROA_CAST]], i64 20, i1 false)
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.S* [[AGG_TMP_I1]] to i8*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.S* [[AGG_TMP_I]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 20, i1 false)
; CHECK-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[P1]] to i8*
; CHECK-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[AGG_TMP_I1]] to i8*
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 [[TMP8]], i64 20, i1 false)
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull [[AGG_TMP14_0__SROA_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull [[TMP4]])
; CHECK-NEXT: br label [[TAILRECURSE]]
; CHECK: return:
; CHECK-NEXT: ret i32 [[CALL]]
;
entry:
%agg.tmp.i = alloca %struct.S, align 8
%agg.tmp14 = alloca %struct.S, align 8
%agg.tmp = alloca %struct.S, align 8
%agg.tmp1 = alloca %struct.S, align 8
%cmp = icmp sgt i32 %count, 10
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%0 = bitcast %struct.S* %agg.tmp to i8*
%1 = bitcast %struct.S* %p1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %0, i8* nonnull align 8 dereferenceable(20) %1, i64 20, i1 false)
%call = call i32 @_Z3zoo1S(%struct.S* nonnull byval(%struct.S) align 8 %agg.tmp)
br label %return
if.end: ; preds = %entry
%add = add nsw i32 %count, 1
%2 = bitcast %struct.S* %agg.tmp1 to i8*
%3 = bitcast %struct.S* %p1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %2, i8* nonnull align 8 dereferenceable(20) %3, i64 20, i1 false)
%agg.tmp14.0..sroa_cast = bitcast %struct.S* %agg.tmp14 to i8*
call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %agg.tmp14.0..sroa_cast)
%4 = bitcast %struct.S* %agg.tmp.i to i8*
call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %4)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %agg.tmp14.0..sroa_cast, i8* nonnull align 8 dereferenceable(20) %2, i64 20, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %4, i8* nonnull align 8 dereferenceable(20) %agg.tmp14.0..sroa_cast, i64 20, i1 false)
%call.i = call i32 @_Z3fooi1S(i32 %add, %struct.S* nonnull byval(%struct.S) align 8 %agg.tmp.i)
call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %agg.tmp14.0..sroa_cast)
call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %4)
br label %return
return: ; preds = %if.end, %if.then
%retval.0 = phi i32 [ %call, %if.then ], [ %call.i, %if.end ]
ret i32 %retval.0
}
declare dso_local i32 @_Z3zoo1S(%struct.S* byval(%struct.S) align 8) local_unnamed_addr #1
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2
attributes #0 = { uwtable }
attributes #1 = { uwtable }
attributes #2 = { argmemonly nounwind willreturn }