Now that we are done with loop unrolling, be it either by LoopVectorizer, or LoopUnroll passes, some variable-offset GEP's into alloca's could have become constant-offset, thus enabling SROA and alloca promotion, yet we don't capitalize on that, which is surprizing. While it would be good to not introduce one more SROA invocation, but instead move the one from `PassBuilder::buildFunctionSimplificationPipeline()`, the existing test coverage says that is a bad idea, though it would be fine compile-time wise: https://llvm-compile-time-tracker.com/compare.php?from=b150d34c47efbd8fa09604bce805c0920360f8d7&to=5a9a5c855158b482552be8c7af3e73d67fa44805&stat=instructions So instead, i add yet another SROA run. I have checked, and it needs to be at least after said final loop unrolling. This is still fine compile-time wise: https://llvm-compile-time-tracker.com/compare.php?from=70324cd88328c0924e605fa81b696572560aa5c9&to=fb489bbef687ad821c3173a931709f9cad9aee8a&stat=instructions I've encountered this in a real code, `SROA-after-final-loop-unrolling.ll` has been reduced from https://godbolt.org/z/fsdMhETh3 Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D136806
114 lines
3.2 KiB
LLVM
114 lines
3.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -O3 -S | FileCheck %s
|
|
; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-pc-linux-gnu"
|
|
|
|
%t0 = type { ptr, ptr }
|
|
%t1 = type { [16 x i32] }
|
|
%t2 = type { %t3, ptr }
|
|
%t3 = type { i8 }
|
|
|
|
define void @wibble(ptr %arg) personality ptr null {
|
|
; CHECK-LABEL: @wibble(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[I10_3_I_PRE:%.*]] = load i8, ptr [[ARG:%.*]], align 1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = or i8 [[I10_3_I_PRE]], 1
|
|
; CHECK-NEXT: [[I1_SROA_0_0_VEC_EXTRACT:%.*]] = zext i8 [[TMP0]] to i32
|
|
; CHECK-NEXT: [[I4_I_I:%.*]] = add nuw nsw i32 [[I1_SROA_0_0_VEC_EXTRACT]], 1
|
|
; CHECK-NEXT: store i32 [[I4_I_I]], ptr [[ARG]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
bb:
|
|
%i = alloca [0 x [0 x [0 x [0 x [0 x [0 x %t0]]]]]], i32 0, align 8
|
|
%i1 = alloca %t1, align 4
|
|
store ptr %arg, ptr %i, align 8
|
|
%i2 = getelementptr %t0, ptr %i, i64 0, i32 1
|
|
store ptr %i1, ptr %i2, align 8
|
|
br label %bb3
|
|
|
|
bb3: ; preds = %bb7, %bb
|
|
%i4 = phi i32 [ 0, %bb ], [ %i8, %bb7 ]
|
|
%i5 = icmp ult i32 %i4, 16
|
|
br i1 %i5, label %bb7, label %bb6
|
|
|
|
bb6: ; preds = %bb3
|
|
call void @baz(ptr %i, ptr %arg)
|
|
ret void
|
|
|
|
bb7: ; preds = %bb3
|
|
call void @foo(ptr %i, i32 %i4)
|
|
%i8 = add i32 %i4, 1
|
|
br label %bb3
|
|
}
|
|
|
|
define linkonce_odr ptr @hoge(ptr %arg, i64 %arg1) {
|
|
bb:
|
|
%i = call ptr @ham(ptr %arg, i64 %arg1)
|
|
ret ptr %i
|
|
}
|
|
|
|
define linkonce_odr void @foo(ptr %arg, i32 %arg1) {
|
|
bb:
|
|
%i = load ptr, ptr %arg, align 8
|
|
br label %bb2
|
|
|
|
bb2: ; preds = %bb6, %bb
|
|
%i3 = phi i32 [ 3, %bb ], [ %i17, %bb6 ]
|
|
%i4 = icmp sgt i32 %i3, -1
|
|
br i1 %i4, label %bb6, label %bb5
|
|
|
|
bb5: ; preds = %bb2
|
|
ret void
|
|
|
|
bb6: ; preds = %bb2
|
|
%i7 = add i32 %i3, %arg1
|
|
%i8 = sext i32 %i7 to i64
|
|
%i9 = call ptr @hoge(ptr %i, i64 %i8)
|
|
%i10 = load i8, ptr %i9, align 1
|
|
%i11 = getelementptr %t0, ptr %arg, i64 0, i32 1
|
|
%i12 = load ptr, ptr %i11, align 8
|
|
%i13 = sext i32 %arg1 to i64
|
|
%i14 = call ptr @foo.1(ptr %i12, i64 %i13)
|
|
%i15 = or i8 %i10, 1
|
|
%i16 = zext i8 %i15 to i32
|
|
store i32 %i16, ptr %i14, align 4
|
|
%i17 = add i32 %i3, -1
|
|
br label %bb2
|
|
}
|
|
|
|
define linkonce_odr void @baz(ptr %arg, ptr %arg1) {
|
|
bb:
|
|
call void @pluto(ptr %arg, ptr %arg1)
|
|
ret void
|
|
}
|
|
|
|
define linkonce_odr ptr @foo.1(ptr %arg, i64 %arg1) {
|
|
bb:
|
|
%i = call ptr @baz.2(ptr %arg, i64 %arg1)
|
|
ret ptr %i
|
|
}
|
|
|
|
define linkonce_odr ptr @baz.2(ptr %arg, i64 %arg1) {
|
|
bb:
|
|
%i = getelementptr [16 x i32], ptr %arg, i64 0, i64 %arg1
|
|
ret ptr %i
|
|
}
|
|
|
|
define linkonce_odr void @pluto(ptr %arg, ptr %arg1) {
|
|
bb:
|
|
%i = getelementptr %t2, ptr %arg, i64 0, i32 1
|
|
%i2 = load ptr, ptr %i, align 8
|
|
%i3 = load i32, ptr %i2, align 4
|
|
%i4 = add i32 %i3, 1
|
|
store i32 %i4, ptr %arg1, align 4
|
|
ret void
|
|
}
|
|
|
|
define linkonce_odr ptr @ham(ptr %arg, i64 %arg1) {
|
|
bb:
|
|
%i = getelementptr [64 x i8], ptr %arg, i64 0, i64 %arg1
|
|
ret ptr %i
|
|
}
|