Files
clang-p2996/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
Sanjay Patel bfb9b8e075 [Passes] add a tail-call-elim pass near the end of the opt pipeline
We call tail-call-elim near the beginning of the pipeline,
but that is too early to annotate calls that get added later.

In the motivating case from issue #47852, the missing 'tail'
on memset leads to sub-optimal codegen.

I experimented with removing the early instance of
tail-call-elim instead of just adding another pass, but that
appears to be slightly worse for compile-time:
+0.15% vs. +0.08% time.
"tailcall" shows adding the pass; "tailcall2" shows moving
the pass to later, then adding the original early pass back
(so 1596886802 is functionally equivalent to 180b0439dc ):
https://llvm-compile-time-tracker.com/index.php?config=NewPM-O3&stat=instructions&remote=rotateright

Note that there was an effort to split the tail call functionality
into 2 passes - that could help reduce compile-time if we find
that this change costs more in compile-time than expected based
on the preliminary testing:
D60031

Differential Revision: https://reviews.llvm.org/D130374
2022-07-25 15:25:47 -04:00

67 lines
3.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -O2 < %s | FileCheck %s
; Test a single-iteration loop that should get SROAd once we realize that fact.
; It should compile down to a bswap.
; The helper function exists to avoid IPSCCP breaking the loop too early.
define i16 @helper(i16 %0, i64 %x) {
; CHECK-LABEL: @helper(
; CHECK-NEXT: start:
; CHECK-NEXT: [[DATA:%.*]] = alloca [2 x i8], align 2
; CHECK-NEXT: store i16 [[TMP0:%.*]], ptr [[DATA]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
; CHECK-NEXT: br label [[BB6_I_I:%.*]]
; CHECK: bb6.i.i:
; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ]
; CHECK-NEXT: [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]]
; CHECK-NEXT: [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1
; CHECK-NEXT: [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]]
; CHECK-NEXT: [[_39_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[TMP1]], i64 0, i64 [[_40_I_I]]
; CHECK-NEXT: [[TMP_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_34_I_I]], align 1
; CHECK-NEXT: [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1
; CHECK-NEXT: store i8 [[TMP2_0_COPYLOAD_I_I_I_I]], ptr [[_34_I_I]], align 1
; CHECK-NEXT: store i8 [[TMP_0_COPYLOAD_I_I_I_I]], ptr [[_39_I_I]], align 1
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP2]], [[X:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[EXIT:%.*]], label [[BB6_I_I]]
; CHECK: exit:
; CHECK-NEXT: [[DOTSROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[DATA]], align 2
; CHECK-NEXT: ret i16 [[DOTSROA_0_0_COPYLOAD]]
;
start:
%data = alloca [2 x i8], align 2
store i16 %0, ptr %data, align 2
%1 = getelementptr inbounds i8, ptr %data, i64 2
%2 = getelementptr inbounds i8, ptr %1, i64 -1
br label %bb6.i.i
bb6.i.i:
%iter.sroa.0.07.i.i = phi i64 [ %4, %bb6.i.i ], [ 0, %start ]
%3 = xor i64 %iter.sroa.0.07.i.i, -1
%_40.i.i = add nsw i64 1, %3
%4 = add nuw nsw i64 %iter.sroa.0.07.i.i, 1
%_34.i.i = getelementptr inbounds [0 x i8], ptr %data, i64 0, i64 %iter.sroa.0.07.i.i
%_39.i.i = getelementptr inbounds [0 x i8], ptr %2, i64 0, i64 %_40.i.i
%tmp.0.copyload.i.i.i.i = load i8, ptr %_34.i.i, align 1
%tmp2.0.copyload.i.i.i.i = load i8, ptr %_39.i.i, align 1
store i8 %tmp2.0.copyload.i.i.i.i, ptr %_34.i.i, align 1
store i8 %tmp.0.copyload.i.i.i.i, ptr %_39.i.i, align 1
%exitcond.not.i.i = icmp eq i64 %4, %x
br i1 %exitcond.not.i.i, label %exit, label %bb6.i.i
exit:
%.sroa.0.0.copyload = load i16, ptr %data, align 2
ret i16 %.sroa.0.0.copyload
}
define i16 @test(i16 %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: bb6.i.i.i:
; CHECK-NEXT: [[DATA_I_SROA_0_0_INSERT_INSERT:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[ARG:%.*]])
; CHECK-NEXT: ret i16 [[DATA_I_SROA_0_0_INSERT_INSERT]]
;
%ret = call i16 @helper(i16 %arg, i64 1)
ret i16 %ret
}