Files
clang-p2996/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling.ll
Roman Lebedev 8adfa29706 [Pipelines] Introduce SROA after (final, run-time) loop unrolling
Now that we are done with loop unrolling, be it either by LoopVectorizer,
or LoopUnroll passes, some variable-offset GEP's into alloca's could have
become constant-offset, thus enabling SROA and alloca promotion,
yet we don't capitalize on that, which is surprizing.

While it would be good to not introduce one more SROA invocation,
but instead move the one from `PassBuilder::buildFunctionSimplificationPipeline()`,
the existing test coverage says that is a bad idea,
though it would be fine compile-time wise: https://llvm-compile-time-tracker.com/compare.php?from=b150d34c47efbd8fa09604bce805c0920360f8d7&to=5a9a5c855158b482552be8c7af3e73d67fa44805&stat=instructions

So instead, i add yet another SROA run.
I have checked, and it needs to be at least after said final loop unrolling.
This is still fine compile-time wise: https://llvm-compile-time-tracker.com/compare.php?from=70324cd88328c0924e605fa81b696572560aa5c9&to=fb489bbef687ad821c3173a931709f9cad9aee8a&stat=instructions

I've encountered this in a real code, `SROA-after-final-loop-unrolling.ll` has been reduced from https://godbolt.org/z/fsdMhETh3

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D136806
2022-11-17 21:31:30 +03:00

114 lines
3.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -O3 -S | FileCheck %s
; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%t0 = type { ptr, ptr }
%t1 = type { [16 x i32] }
%t2 = type { %t3, ptr }
%t3 = type { i8 }
define void @wibble(ptr %arg) personality ptr null {
; CHECK-LABEL: @wibble(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I10_3_I_PRE:%.*]] = load i8, ptr [[ARG:%.*]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = or i8 [[I10_3_I_PRE]], 1
; CHECK-NEXT: [[I1_SROA_0_0_VEC_EXTRACT:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[I4_I_I:%.*]] = add nuw nsw i32 [[I1_SROA_0_0_VEC_EXTRACT]], 1
; CHECK-NEXT: store i32 [[I4_I_I]], ptr [[ARG]], align 4
; CHECK-NEXT: ret void
;
bb:
%i = alloca [0 x [0 x [0 x [0 x [0 x [0 x %t0]]]]]], i32 0, align 8
%i1 = alloca %t1, align 4
store ptr %arg, ptr %i, align 8
%i2 = getelementptr %t0, ptr %i, i64 0, i32 1
store ptr %i1, ptr %i2, align 8
br label %bb3
bb3: ; preds = %bb7, %bb
%i4 = phi i32 [ 0, %bb ], [ %i8, %bb7 ]
%i5 = icmp ult i32 %i4, 16
br i1 %i5, label %bb7, label %bb6
bb6: ; preds = %bb3
call void @baz(ptr %i, ptr %arg)
ret void
bb7: ; preds = %bb3
call void @foo(ptr %i, i32 %i4)
%i8 = add i32 %i4, 1
br label %bb3
}
define linkonce_odr ptr @hoge(ptr %arg, i64 %arg1) {
bb:
%i = call ptr @ham(ptr %arg, i64 %arg1)
ret ptr %i
}
define linkonce_odr void @foo(ptr %arg, i32 %arg1) {
bb:
%i = load ptr, ptr %arg, align 8
br label %bb2
bb2: ; preds = %bb6, %bb
%i3 = phi i32 [ 3, %bb ], [ %i17, %bb6 ]
%i4 = icmp sgt i32 %i3, -1
br i1 %i4, label %bb6, label %bb5
bb5: ; preds = %bb2
ret void
bb6: ; preds = %bb2
%i7 = add i32 %i3, %arg1
%i8 = sext i32 %i7 to i64
%i9 = call ptr @hoge(ptr %i, i64 %i8)
%i10 = load i8, ptr %i9, align 1
%i11 = getelementptr %t0, ptr %arg, i64 0, i32 1
%i12 = load ptr, ptr %i11, align 8
%i13 = sext i32 %arg1 to i64
%i14 = call ptr @foo.1(ptr %i12, i64 %i13)
%i15 = or i8 %i10, 1
%i16 = zext i8 %i15 to i32
store i32 %i16, ptr %i14, align 4
%i17 = add i32 %i3, -1
br label %bb2
}
define linkonce_odr void @baz(ptr %arg, ptr %arg1) {
bb:
call void @pluto(ptr %arg, ptr %arg1)
ret void
}
define linkonce_odr ptr @foo.1(ptr %arg, i64 %arg1) {
bb:
%i = call ptr @baz.2(ptr %arg, i64 %arg1)
ret ptr %i
}
define linkonce_odr ptr @baz.2(ptr %arg, i64 %arg1) {
bb:
%i = getelementptr [16 x i32], ptr %arg, i64 0, i64 %arg1
ret ptr %i
}
define linkonce_odr void @pluto(ptr %arg, ptr %arg1) {
bb:
%i = getelementptr %t2, ptr %arg, i64 0, i32 1
%i2 = load ptr, ptr %i, align 8
%i3 = load i32, ptr %i2, align 4
%i4 = add i32 %i3, 1
store i32 %i4, ptr %arg1, align 4
ret void
}
define linkonce_odr ptr @ham(ptr %arg, i64 %arg1) {
bb:
%i = getelementptr [64 x i8], ptr %arg, i64 0, i64 %arg1
ret ptr %i
}